airbyte-cdk 6.37.2.dev1__py3-none-any.whl → 6.38.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +28 -7
- airbyte_cdk/sources/declarative/datetime/__init__.py +0 -4
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +11 -1
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +72 -5
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +2 -3
- airbyte_cdk/sources/declarative/interpolation/macros.py +3 -3
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +34 -8
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +18 -4
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +120 -8
- airbyte_cdk/sources/declarative/requesters/README.md +5 -5
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +18 -13
- airbyte_cdk/sources/declarative/requesters/http_requester.py +49 -17
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +25 -4
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +6 -1
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +7 -2
- airbyte_cdk/sources/declarative/requesters/requester.py +7 -1
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +21 -4
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +2 -2
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +0 -1
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +2 -1
- airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +3 -3
- airbyte_cdk/sources/types.py +1 -0
- airbyte_cdk/utils/mapping_helpers.py +18 -1
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.38.0.dist-info}/METADATA +4 -4
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.38.0.dist-info}/RECORD +30 -34
- airbyte_cdk/sources/embedded/__init__.py +0 -3
- airbyte_cdk/sources/embedded/base_integration.py +0 -61
- airbyte_cdk/sources/embedded/catalog.py +0 -57
- airbyte_cdk/sources/embedded/runner.py +0 -57
- airbyte_cdk/sources/embedded/tools.py +0 -27
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.38.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.38.0.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.38.0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.38.0.dist-info}/entry_points.txt +0 -0
@@ -56,7 +56,7 @@ from airbyte_cdk.sources.declarative.auth.token_provider import (
|
|
56
56
|
)
|
57
57
|
from airbyte_cdk.sources.declarative.checks import CheckDynamicStream, CheckStream
|
58
58
|
from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
|
59
|
-
from airbyte_cdk.sources.declarative.datetime import MinMaxDatetime
|
59
|
+
from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime
|
60
60
|
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
|
61
61
|
from airbyte_cdk.sources.declarative.decoders import (
|
62
62
|
Decoder,
|
@@ -245,6 +245,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
245
245
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
246
246
|
HttpResponseFilter as HttpResponseFilterModel,
|
247
247
|
)
|
248
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
249
|
+
IncrementingCountCursor as IncrementingCountCursorModel,
|
250
|
+
)
|
248
251
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
249
252
|
InlineSchemaLoader as InlineSchemaLoaderModel,
|
250
253
|
)
|
@@ -496,6 +499,9 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_sta
|
|
496
499
|
CustomFormatConcurrentStreamStateConverter,
|
497
500
|
DateTimeStreamStateConverter,
|
498
501
|
)
|
502
|
+
from airbyte_cdk.sources.streams.concurrent.state_converters.incrementing_count_stream_state_converter import (
|
503
|
+
IncrementingCountStreamStateConverter,
|
504
|
+
)
|
499
505
|
from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction
|
500
506
|
from airbyte_cdk.sources.types import Config
|
501
507
|
from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
|
@@ -584,6 +590,7 @@ class ModelToComponentFactory:
|
|
584
590
|
FlattenFieldsModel: self.create_flatten_fields,
|
585
591
|
DpathFlattenFieldsModel: self.create_dpath_flatten_fields,
|
586
592
|
IterableDecoderModel: self.create_iterable_decoder,
|
593
|
+
IncrementingCountCursorModel: self.create_incrementing_count_cursor,
|
587
594
|
XmlDecoderModel: self.create_xml_decoder,
|
588
595
|
JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
|
589
596
|
DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
|
@@ -1189,6 +1196,70 @@ class ModelToComponentFactory:
|
|
1189
1196
|
clamping_strategy=clamping_strategy,
|
1190
1197
|
)
|
1191
1198
|
|
1199
|
+
def create_concurrent_cursor_from_incrementing_count_cursor(
|
1200
|
+
self,
|
1201
|
+
model_type: Type[BaseModel],
|
1202
|
+
component_definition: ComponentDefinition,
|
1203
|
+
stream_name: str,
|
1204
|
+
stream_namespace: Optional[str],
|
1205
|
+
config: Config,
|
1206
|
+
message_repository: Optional[MessageRepository] = None,
|
1207
|
+
**kwargs: Any,
|
1208
|
+
) -> ConcurrentCursor:
|
1209
|
+
# Per-partition incremental streams can dynamically create child cursors which will pass their current
|
1210
|
+
# state via the stream_state keyword argument. Incremental syncs without parent streams use the
|
1211
|
+
# incoming state and connector_state_manager that is initialized when the component factory is created
|
1212
|
+
stream_state = (
|
1213
|
+
self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
|
1214
|
+
if "stream_state" not in kwargs
|
1215
|
+
else kwargs["stream_state"]
|
1216
|
+
)
|
1217
|
+
|
1218
|
+
component_type = component_definition.get("type")
|
1219
|
+
if component_definition.get("type") != model_type.__name__:
|
1220
|
+
raise ValueError(
|
1221
|
+
f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
|
1222
|
+
)
|
1223
|
+
|
1224
|
+
incrementing_count_cursor_model = model_type.parse_obj(component_definition)
|
1225
|
+
|
1226
|
+
if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel):
|
1227
|
+
raise ValueError(
|
1228
|
+
f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}"
|
1229
|
+
)
|
1230
|
+
|
1231
|
+
interpolated_start_value = (
|
1232
|
+
InterpolatedString.create(
|
1233
|
+
incrementing_count_cursor_model.start_value, # type: ignore
|
1234
|
+
parameters=incrementing_count_cursor_model.parameters or {},
|
1235
|
+
)
|
1236
|
+
if incrementing_count_cursor_model.start_value
|
1237
|
+
else 0
|
1238
|
+
)
|
1239
|
+
|
1240
|
+
interpolated_cursor_field = InterpolatedString.create(
|
1241
|
+
incrementing_count_cursor_model.cursor_field,
|
1242
|
+
parameters=incrementing_count_cursor_model.parameters or {},
|
1243
|
+
)
|
1244
|
+
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
1245
|
+
|
1246
|
+
connector_state_converter = IncrementingCountStreamStateConverter(
|
1247
|
+
is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
|
1248
|
+
)
|
1249
|
+
|
1250
|
+
return ConcurrentCursor(
|
1251
|
+
stream_name=stream_name,
|
1252
|
+
stream_namespace=stream_namespace,
|
1253
|
+
stream_state=stream_state,
|
1254
|
+
message_repository=message_repository or self._message_repository,
|
1255
|
+
connector_state_manager=self._connector_state_manager,
|
1256
|
+
connector_state_converter=connector_state_converter,
|
1257
|
+
cursor_field=cursor_field,
|
1258
|
+
slice_boundary_fields=None,
|
1259
|
+
start=interpolated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
1260
|
+
end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
1261
|
+
)
|
1262
|
+
|
1192
1263
|
def _assemble_weekday(self, weekday: str) -> Weekday:
|
1193
1264
|
match weekday:
|
1194
1265
|
case "MONDAY":
|
@@ -1622,6 +1693,31 @@ class ModelToComponentFactory:
|
|
1622
1693
|
config=config,
|
1623
1694
|
parameters=model.parameters or {},
|
1624
1695
|
)
|
1696
|
+
elif model.incremental_sync and isinstance(
|
1697
|
+
model.incremental_sync, IncrementingCountCursorModel
|
1698
|
+
):
|
1699
|
+
cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore
|
1700
|
+
|
1701
|
+
start_time_option = (
|
1702
|
+
self._create_component_from_model(
|
1703
|
+
cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
|
1704
|
+
config,
|
1705
|
+
parameters=cursor_model.parameters or {},
|
1706
|
+
)
|
1707
|
+
if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
|
1708
|
+
else None
|
1709
|
+
)
|
1710
|
+
|
1711
|
+
# The concurrent engine defaults the start/end fields on the slice to "start" and "end", but
|
1712
|
+
# the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time
|
1713
|
+
partition_field_start = "start"
|
1714
|
+
|
1715
|
+
request_options_provider = DatetimeBasedRequestOptionsProvider(
|
1716
|
+
start_time_option=start_time_option,
|
1717
|
+
partition_field_start=partition_field_start,
|
1718
|
+
config=config,
|
1719
|
+
parameters=model.parameters or {},
|
1720
|
+
)
|
1625
1721
|
else:
|
1626
1722
|
request_options_provider = None
|
1627
1723
|
|
@@ -2111,6 +2207,22 @@ class ModelToComponentFactory:
|
|
2111
2207
|
stream_response=False if self._emit_connector_builder_messages else True,
|
2112
2208
|
)
|
2113
2209
|
|
2210
|
+
@staticmethod
|
2211
|
+
def create_incrementing_count_cursor(
|
2212
|
+
model: IncrementingCountCursorModel, config: Config, **kwargs: Any
|
2213
|
+
) -> DatetimeBasedCursor:
|
2214
|
+
# This should not actually get used anywhere at runtime, but needed to add this to pass checks since
|
2215
|
+
# we still parse models into components. The issue is that there's no runtime implementation of a
|
2216
|
+
# IncrementingCountCursor.
|
2217
|
+
# A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor.
|
2218
|
+
return DatetimeBasedCursor(
|
2219
|
+
cursor_field=model.cursor_field,
|
2220
|
+
datetime_format="%Y-%m-%d",
|
2221
|
+
start_datetime="2024-12-12",
|
2222
|
+
config=config,
|
2223
|
+
parameters={},
|
2224
|
+
)
|
2225
|
+
|
2114
2226
|
@staticmethod
|
2115
2227
|
def create_iterable_decoder(
|
2116
2228
|
model: IterableDecoderModel, config: Config, **kwargs: Any
|
@@ -2744,32 +2856,32 @@ class ModelToComponentFactory:
|
|
2744
2856
|
if model.delete_requester
|
2745
2857
|
else None
|
2746
2858
|
)
|
2747
|
-
|
2859
|
+
download_target_requester = (
|
2748
2860
|
self._create_component_from_model(
|
2749
|
-
model=model.
|
2861
|
+
model=model.download_target_requester,
|
2750
2862
|
decoder=decoder,
|
2751
2863
|
config=config,
|
2752
2864
|
name=f"job extract_url - {name}",
|
2753
2865
|
)
|
2754
|
-
if model.
|
2866
|
+
if model.download_target_requester
|
2755
2867
|
else None
|
2756
2868
|
)
|
2757
2869
|
status_extractor = self._create_component_from_model(
|
2758
2870
|
model=model.status_extractor, decoder=decoder, config=config, name=name
|
2759
2871
|
)
|
2760
|
-
|
2761
|
-
model=model.
|
2872
|
+
download_target_extractor = self._create_component_from_model(
|
2873
|
+
model=model.download_target_extractor, decoder=decoder, config=config, name=name
|
2762
2874
|
)
|
2763
2875
|
job_repository: AsyncJobRepository = AsyncHttpJobRepository(
|
2764
2876
|
creation_requester=creation_requester,
|
2765
2877
|
polling_requester=polling_requester,
|
2766
2878
|
download_retriever=download_retriever,
|
2767
|
-
|
2879
|
+
download_target_requester=download_target_requester,
|
2768
2880
|
abort_requester=abort_requester,
|
2769
2881
|
delete_requester=delete_requester,
|
2770
2882
|
status_extractor=status_extractor,
|
2771
2883
|
status_mapping=self._create_async_job_status_mapping(model.status_mapping, config),
|
2772
|
-
|
2884
|
+
download_target_extractor=download_target_extractor,
|
2773
2885
|
)
|
2774
2886
|
|
2775
2887
|
async_job_partition_router = AsyncJobPartitionRouter(
|
@@ -1,8 +1,8 @@
|
|
1
1
|
# AsyncHttpJobRepository sequence diagram
|
2
2
|
|
3
3
|
- Components marked as optional are not required and can be ignored.
|
4
|
-
- if `
|
5
|
-
- interpolation_context, e.g. `
|
4
|
+
- if `download_target_requester` is not provided, `download_target_extractor` will get urls from the `polling_response`
|
5
|
+
- interpolation_context, e.g. `creation_response` or `polling_response` can be obtained from stream_slice
|
6
6
|
|
7
7
|
```mermaid
|
8
8
|
---
|
@@ -12,7 +12,7 @@ sequenceDiagram
|
|
12
12
|
participant AsyncHttpJobRepository as AsyncOrchestrator
|
13
13
|
participant CreationRequester as creation_requester
|
14
14
|
participant PollingRequester as polling_requester
|
15
|
-
participant UrlRequester as
|
15
|
+
participant UrlRequester as download_target_requester (Optional)
|
16
16
|
participant DownloadRetriever as download_retriever
|
17
17
|
participant AbortRequester as abort_requester (Optional)
|
18
18
|
participant DeleteRequester as delete_requester (Optional)
|
@@ -25,14 +25,14 @@ sequenceDiagram
|
|
25
25
|
|
26
26
|
loop Poll for job status
|
27
27
|
AsyncHttpJobRepository ->> PollingRequester: Check job status
|
28
|
-
PollingRequester ->> Reporting Server: Status request (interpolation_context: `
|
28
|
+
PollingRequester ->> Reporting Server: Status request (interpolation_context: `creation_response`)
|
29
29
|
Reporting Server -->> PollingRequester: Status response
|
30
30
|
PollingRequester -->> AsyncHttpJobRepository: Job status
|
31
31
|
end
|
32
32
|
|
33
33
|
alt Status: Ready
|
34
34
|
AsyncHttpJobRepository ->> UrlRequester: Request download URLs (if applicable)
|
35
|
-
UrlRequester ->> Reporting Server: URL request (interpolation_context: `
|
35
|
+
UrlRequester ->> Reporting Server: URL request (interpolation_context: `polling_response`)
|
36
36
|
Reporting Server -->> UrlRequester: Download URLs
|
37
37
|
UrlRequester -->> AsyncHttpJobRepository: Download URLs
|
38
38
|
|
@@ -43,13 +43,13 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
43
43
|
delete_requester: Optional[Requester]
|
44
44
|
status_extractor: DpathExtractor
|
45
45
|
status_mapping: Mapping[str, AsyncJobStatus]
|
46
|
-
|
46
|
+
download_target_extractor: DpathExtractor
|
47
47
|
|
48
48
|
job_timeout: Optional[timedelta] = None
|
49
49
|
record_extractor: RecordExtractor = field(
|
50
50
|
init=False, repr=False, default_factory=lambda: ResponseToFileExtractor({})
|
51
51
|
)
|
52
|
-
|
52
|
+
download_target_requester: Optional[Requester] = (
|
53
53
|
None # use it in case polling_requester provides some <id> and extra request is needed to obtain list of urls to download from
|
54
54
|
)
|
55
55
|
|
@@ -211,12 +211,15 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
211
211
|
|
212
212
|
"""
|
213
213
|
|
214
|
-
for
|
214
|
+
for target_url in self._get_download_targets(job):
|
215
215
|
job_slice = job.job_parameters()
|
216
216
|
stream_slice = StreamSlice(
|
217
217
|
partition=job_slice.partition,
|
218
218
|
cursor_slice=job_slice.cursor_slice,
|
219
|
-
extra_fields={
|
219
|
+
extra_fields={
|
220
|
+
**job_slice.extra_fields,
|
221
|
+
"download_target": target_url,
|
222
|
+
},
|
220
223
|
)
|
221
224
|
for message in self.download_retriever.read_records({}, stream_slice):
|
222
225
|
if isinstance(message, Record):
|
@@ -269,27 +272,29 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
269
272
|
del self._polling_job_response_by_id[job_id]
|
270
273
|
|
271
274
|
def _get_create_job_stream_slice(self, job: AsyncJob) -> StreamSlice:
|
275
|
+
creation_response = self._create_job_response_by_id[job.api_job_id()].json()
|
272
276
|
stream_slice = StreamSlice(
|
273
|
-
partition={
|
277
|
+
partition={},
|
274
278
|
cursor_slice={},
|
279
|
+
extra_fields={"creation_response": creation_response},
|
275
280
|
)
|
276
281
|
return stream_slice
|
277
282
|
|
278
|
-
def
|
279
|
-
if not self.
|
283
|
+
def _get_download_targets(self, job: AsyncJob) -> Iterable[str]:
|
284
|
+
if not self.download_target_requester:
|
280
285
|
url_response = self._polling_job_response_by_id[job.api_job_id()]
|
281
286
|
else:
|
287
|
+
polling_response = self._polling_job_response_by_id[job.api_job_id()].json()
|
282
288
|
stream_slice: StreamSlice = StreamSlice(
|
283
|
-
partition={
|
284
|
-
"polling_job_response": self._polling_job_response_by_id[job.api_job_id()]
|
285
|
-
},
|
289
|
+
partition={},
|
286
290
|
cursor_slice={},
|
291
|
+
extra_fields={"polling_response": polling_response},
|
287
292
|
)
|
288
|
-
url_response = self.
|
293
|
+
url_response = self.download_target_requester.send_request(stream_slice=stream_slice) # type: ignore # we expect download_target_requester to always be presented, otherwise raise an exception as we cannot proceed with the report
|
289
294
|
if not url_response:
|
290
295
|
raise AirbyteTracedException(
|
291
|
-
internal_message="Always expect a response or an exception from
|
296
|
+
internal_message="Always expect a response or an exception from download_target_requester",
|
292
297
|
failure_type=FailureType.system_error,
|
293
298
|
)
|
294
299
|
|
295
|
-
yield from self.
|
300
|
+
yield from self.download_target_extractor.extract_records(url_response) # type: ignore # we expect download_target_extractor to always return list of strings
|
@@ -25,8 +25,8 @@ from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
|
|
25
25
|
from airbyte_cdk.sources.streams.call_rate import APIBudget
|
26
26
|
from airbyte_cdk.sources.streams.http import HttpClient
|
27
27
|
from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler
|
28
|
-
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
29
|
-
from airbyte_cdk.utils.mapping_helpers import combine_mappings
|
28
|
+
from airbyte_cdk.sources.types import Config, EmptyString, StreamSlice, StreamState
|
29
|
+
from airbyte_cdk.utils.mapping_helpers import combine_mappings, get_interpolation_context
|
30
30
|
|
31
31
|
|
32
32
|
@dataclass
|
@@ -49,9 +49,10 @@ class HttpRequester(Requester):
|
|
49
49
|
|
50
50
|
name: str
|
51
51
|
url_base: Union[InterpolatedString, str]
|
52
|
-
path: Union[InterpolatedString, str]
|
53
52
|
config: Config
|
54
53
|
parameters: InitVar[Mapping[str, Any]]
|
54
|
+
|
55
|
+
path: Optional[Union[InterpolatedString, str]] = None
|
55
56
|
authenticator: Optional[DeclarativeAuthenticator] = None
|
56
57
|
http_method: Union[str, HttpMethod] = HttpMethod.GET
|
57
58
|
request_options_provider: Optional[InterpolatedRequestOptionsProvider] = None
|
@@ -66,7 +67,9 @@ class HttpRequester(Requester):
|
|
66
67
|
|
67
68
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
68
69
|
self._url_base = InterpolatedString.create(self.url_base, parameters=parameters)
|
69
|
-
self._path = InterpolatedString.create(
|
70
|
+
self._path = InterpolatedString.create(
|
71
|
+
self.path if self.path else EmptyString, parameters=parameters
|
72
|
+
)
|
70
73
|
if self.request_options_provider is None:
|
71
74
|
self._request_options_provider = InterpolatedRequestOptionsProvider(
|
72
75
|
config=self.config, parameters=parameters
|
@@ -85,7 +88,7 @@ class HttpRequester(Requester):
|
|
85
88
|
self._parameters = parameters
|
86
89
|
|
87
90
|
if self.error_handler is not None and hasattr(self.error_handler, "backoff_strategies"):
|
88
|
-
backoff_strategies = self.error_handler.backoff_strategies
|
91
|
+
backoff_strategies = self.error_handler.backoff_strategies # type: ignore
|
89
92
|
else:
|
90
93
|
backoff_strategies = None
|
91
94
|
|
@@ -112,21 +115,33 @@ class HttpRequester(Requester):
|
|
112
115
|
def get_authenticator(self) -> DeclarativeAuthenticator:
|
113
116
|
return self._authenticator
|
114
117
|
|
115
|
-
def get_url_base(
|
116
|
-
|
118
|
+
def get_url_base(
|
119
|
+
self,
|
120
|
+
*,
|
121
|
+
stream_state: Optional[StreamState] = None,
|
122
|
+
stream_slice: Optional[StreamSlice] = None,
|
123
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
124
|
+
) -> str:
|
125
|
+
interpolation_context = get_interpolation_context(
|
126
|
+
stream_state=stream_state,
|
127
|
+
stream_slice=stream_slice,
|
128
|
+
next_page_token=next_page_token,
|
129
|
+
)
|
130
|
+
return os.path.join(self._url_base.eval(self.config, **interpolation_context), EmptyString)
|
117
131
|
|
118
132
|
def get_path(
|
119
133
|
self,
|
120
134
|
*,
|
121
|
-
stream_state: Optional[StreamState],
|
122
|
-
stream_slice: Optional[StreamSlice],
|
123
|
-
next_page_token: Optional[Mapping[str, Any]],
|
135
|
+
stream_state: Optional[StreamState] = None,
|
136
|
+
stream_slice: Optional[StreamSlice] = None,
|
137
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
124
138
|
) -> str:
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
139
|
+
interpolation_context = get_interpolation_context(
|
140
|
+
stream_state=stream_state,
|
141
|
+
stream_slice=stream_slice,
|
142
|
+
next_page_token=next_page_token,
|
143
|
+
)
|
144
|
+
path = str(self._path.eval(self.config, **interpolation_context))
|
130
145
|
return path.lstrip("/")
|
131
146
|
|
132
147
|
def get_method(self) -> HttpMethod:
|
@@ -324,7 +339,20 @@ class HttpRequester(Requester):
|
|
324
339
|
|
325
340
|
@classmethod
|
326
341
|
def _join_url(cls, url_base: str, path: str) -> str:
|
327
|
-
|
342
|
+
"""
|
343
|
+
Joins a base URL with a given path and returns the resulting URL with any trailing slash removed.
|
344
|
+
|
345
|
+
This method ensures that there are no duplicate slashes when concatenating the base URL and the path,
|
346
|
+
which is useful when the full URL is provided from an interpolation context.
|
347
|
+
|
348
|
+
Args:
|
349
|
+
url_base (str): The base URL to which the path will be appended.
|
350
|
+
path (str): The path to join with the base URL.
|
351
|
+
|
352
|
+
Returns:
|
353
|
+
str: The concatenated URL with the trailing slash (if any) removed.
|
354
|
+
"""
|
355
|
+
return urljoin(url_base, path).rstrip("/")
|
328
356
|
|
329
357
|
def send_request(
|
330
358
|
self,
|
@@ -341,7 +369,11 @@ class HttpRequester(Requester):
|
|
341
369
|
request, response = self._http_client.send_request(
|
342
370
|
http_method=self.get_method().value,
|
343
371
|
url=self._join_url(
|
344
|
-
self.get_url_base(
|
372
|
+
self.get_url_base(
|
373
|
+
stream_state=stream_state,
|
374
|
+
stream_slice=stream_slice,
|
375
|
+
next_page_token=next_page_token,
|
376
|
+
),
|
345
377
|
path
|
346
378
|
or self.get_path(
|
347
379
|
stream_state=stream_state,
|
@@ -25,6 +25,7 @@ from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
|
|
25
25
|
from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
|
26
26
|
from airbyte_cdk.utils.mapping_helpers import (
|
27
27
|
_validate_component_request_option_paths,
|
28
|
+
get_interpolation_context,
|
28
29
|
)
|
29
30
|
|
30
31
|
|
@@ -150,11 +151,22 @@ class DefaultPaginator(Paginator):
|
|
150
151
|
else:
|
151
152
|
return None
|
152
153
|
|
153
|
-
def path(
|
154
|
+
def path(
|
155
|
+
self,
|
156
|
+
next_page_token: Optional[Mapping[str, Any]],
|
157
|
+
stream_state: Optional[Mapping[str, Any]] = None,
|
158
|
+
stream_slice: Optional[StreamSlice] = None,
|
159
|
+
) -> Optional[str]:
|
154
160
|
token = next_page_token.get("next_page_token") if next_page_token else None
|
155
161
|
if token and self.page_token_option and isinstance(self.page_token_option, RequestPath):
|
162
|
+
# make additional interpolation context
|
163
|
+
interpolation_context = get_interpolation_context(
|
164
|
+
stream_state=stream_state,
|
165
|
+
stream_slice=stream_slice,
|
166
|
+
next_page_token=next_page_token,
|
167
|
+
)
|
156
168
|
# Replace url base to only return the path
|
157
|
-
return str(token).replace(self.url_base.eval(self.config), "") # type: ignore # url_base is casted to a InterpolatedString in __post_init__
|
169
|
+
return str(token).replace(self.url_base.eval(self.config, **interpolation_context), "") # type: ignore # url_base is casted to a InterpolatedString in __post_init__
|
158
170
|
else:
|
159
171
|
return None
|
160
172
|
|
@@ -258,8 +270,17 @@ class PaginatorTestReadDecorator(Paginator):
|
|
258
270
|
response, last_page_size, last_record, last_page_token_value
|
259
271
|
)
|
260
272
|
|
261
|
-
def path(
|
262
|
-
|
273
|
+
def path(
|
274
|
+
self,
|
275
|
+
next_page_token: Optional[Mapping[str, Any]],
|
276
|
+
stream_state: Optional[Mapping[str, Any]] = None,
|
277
|
+
stream_slice: Optional[StreamSlice] = None,
|
278
|
+
) -> Optional[str]:
|
279
|
+
return self._decorated.path(
|
280
|
+
next_page_token=next_page_token,
|
281
|
+
stream_state=stream_state,
|
282
|
+
stream_slice=stream_slice,
|
283
|
+
)
|
263
284
|
|
264
285
|
def get_request_params(
|
265
286
|
self,
|
@@ -19,7 +19,12 @@ class NoPagination(Paginator):
|
|
19
19
|
|
20
20
|
parameters: InitVar[Mapping[str, Any]]
|
21
21
|
|
22
|
-
def path(
|
22
|
+
def path(
|
23
|
+
self,
|
24
|
+
next_page_token: Optional[Mapping[str, Any]],
|
25
|
+
stream_state: Optional[Mapping[str, Any]] = None,
|
26
|
+
stream_slice: Optional[StreamSlice] = None,
|
27
|
+
) -> Optional[str]:
|
23
28
|
return None
|
24
29
|
|
25
30
|
def get_request_params(
|
@@ -11,7 +11,7 @@ import requests
|
|
11
11
|
from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import (
|
12
12
|
RequestOptionsProvider,
|
13
13
|
)
|
14
|
-
from airbyte_cdk.sources.types import Record
|
14
|
+
from airbyte_cdk.sources.types import Record, StreamSlice
|
15
15
|
|
16
16
|
|
17
17
|
@dataclass
|
@@ -49,7 +49,12 @@ class Paginator(ABC, RequestOptionsProvider):
|
|
49
49
|
pass
|
50
50
|
|
51
51
|
@abstractmethod
|
52
|
-
def path(
|
52
|
+
def path(
|
53
|
+
self,
|
54
|
+
next_page_token: Optional[Mapping[str, Any]],
|
55
|
+
stream_state: Optional[Mapping[str, Any]] = None,
|
56
|
+
stream_slice: Optional[StreamSlice] = None,
|
57
|
+
) -> Optional[str]:
|
53
58
|
"""
|
54
59
|
Returns the URL path to hit to fetch the next page of records
|
55
60
|
|
@@ -35,7 +35,13 @@ class Requester(RequestOptionsProvider):
|
|
35
35
|
pass
|
36
36
|
|
37
37
|
@abstractmethod
|
38
|
-
def get_url_base(
|
38
|
+
def get_url_base(
|
39
|
+
self,
|
40
|
+
*,
|
41
|
+
stream_state: Optional[StreamState],
|
42
|
+
stream_slice: Optional[StreamSlice],
|
43
|
+
next_page_token: Optional[Mapping[str, Any]],
|
44
|
+
) -> str:
|
39
45
|
"""
|
40
46
|
:return: URL base for the API endpoint e.g: if you wanted to hit https://myapi.com/v1/some_entity then this should return "https://myapi.com/v1/"
|
41
47
|
"""
|
@@ -234,13 +234,22 @@ class SimpleRetriever(Retriever):
|
|
234
234
|
raise ValueError("Request body json cannot be a string")
|
235
235
|
return body_json
|
236
236
|
|
237
|
-
def _paginator_path(
|
237
|
+
def _paginator_path(
|
238
|
+
self,
|
239
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
240
|
+
stream_state: Optional[Mapping[str, Any]] = None,
|
241
|
+
stream_slice: Optional[StreamSlice] = None,
|
242
|
+
) -> Optional[str]:
|
238
243
|
"""
|
239
244
|
If the paginator points to a path, follow it, else return nothing so the requester is used.
|
240
245
|
:param next_page_token:
|
241
246
|
:return:
|
242
247
|
"""
|
243
|
-
return self._paginator.path(
|
248
|
+
return self._paginator.path(
|
249
|
+
next_page_token=next_page_token,
|
250
|
+
stream_state=stream_state,
|
251
|
+
stream_slice=stream_slice,
|
252
|
+
)
|
244
253
|
|
245
254
|
def _parse_response(
|
246
255
|
self,
|
@@ -299,7 +308,11 @@ class SimpleRetriever(Retriever):
|
|
299
308
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
300
309
|
) -> Optional[requests.Response]:
|
301
310
|
return self.requester.send_request(
|
302
|
-
path=self._paginator_path(
|
311
|
+
path=self._paginator_path(
|
312
|
+
next_page_token=next_page_token,
|
313
|
+
stream_state=stream_state,
|
314
|
+
stream_slice=stream_slice,
|
315
|
+
),
|
303
316
|
stream_state=stream_state,
|
304
317
|
stream_slice=stream_slice,
|
305
318
|
next_page_token=next_page_token,
|
@@ -570,7 +583,11 @@ class SimpleRetrieverTestReadDecorator(SimpleRetriever):
|
|
570
583
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
571
584
|
) -> Optional[requests.Response]:
|
572
585
|
return self.requester.send_request(
|
573
|
-
path=self._paginator_path(
|
586
|
+
path=self._paginator_path(
|
587
|
+
next_page_token=next_page_token,
|
588
|
+
stream_state=stream_state,
|
589
|
+
stream_slice=stream_slice,
|
590
|
+
),
|
574
591
|
stream_state=stream_state,
|
575
592
|
stream_slice=stream_slice,
|
576
593
|
next_page_token=next_page_token,
|
@@ -6,7 +6,7 @@ import re
|
|
6
6
|
from dataclasses import dataclass
|
7
7
|
from typing import Any, Dict, List, Optional
|
8
8
|
|
9
|
-
import
|
9
|
+
import anyascii
|
10
10
|
|
11
11
|
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
12
12
|
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
@@ -48,7 +48,7 @@ class KeysToSnakeCaseTransformation(RecordTransformation):
|
|
48
48
|
return self.tokens_to_snake_case(tokens)
|
49
49
|
|
50
50
|
def normalize_key(self, key: str) -> str:
|
51
|
-
return
|
51
|
+
return str(anyascii.anyascii(key))
|
52
52
|
|
53
53
|
def tokenize_key(self, key: str) -> List[str]:
|
54
54
|
tokens = []
|
@@ -50,7 +50,6 @@ class YamlDeclarativeSource(ConcurrentDeclarativeSource[List[AirbyteStateMessage
|
|
50
50
|
|
51
51
|
def _emit_manifest_debug_message(self, extra_args: dict[str, Any]) -> None:
|
52
52
|
extra_args["path_to_yaml"] = self._path_to_yaml
|
53
|
-
self.logger.debug("declarative source created from parsed YAML manifest", extra=extra_args)
|
54
53
|
|
55
54
|
@staticmethod
|
56
55
|
def _parse(connection_definition_str: str) -> ConnectionDefinition:
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from abc import ABC, abstractmethod
|
6
6
|
from enum import Enum
|
7
|
-
from typing import TYPE_CHECKING, Any, List, MutableMapping, Optional, Tuple
|
7
|
+
from typing import TYPE_CHECKING, Any, Callable, List, MutableMapping, Optional, Tuple
|
8
8
|
|
9
9
|
if TYPE_CHECKING:
|
10
10
|
from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
|
@@ -12,6 +12,7 @@ if TYPE_CHECKING:
|
|
12
12
|
|
13
13
|
class ConcurrencyCompatibleStateType(Enum):
|
14
14
|
date_range = "date-range"
|
15
|
+
integer = "integer"
|
15
16
|
|
16
17
|
|
17
18
|
class AbstractStreamStateConverter(ABC):
|