airbyte-cdk 7.0.3__py3-none-any.whl → 7.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +0 -1
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2 -2
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +14 -5
- airbyte_cdk/sources/declarative/requesters/README.md +14 -3
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +29 -6
- {airbyte_cdk-7.0.3.dist-info → airbyte_cdk-7.0.4.dist-info}/METADATA +1 -1
- {airbyte_cdk-7.0.3.dist-info → airbyte_cdk-7.0.4.dist-info}/RECORD +11 -11
- {airbyte_cdk-7.0.3.dist-info → airbyte_cdk-7.0.4.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-7.0.3.dist-info → airbyte_cdk-7.0.4.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-7.0.3.dist-info → airbyte_cdk-7.0.4.dist-info}/WHEEL +0 -0
- {airbyte_cdk-7.0.3.dist-info → airbyte_cdk-7.0.4.dist-info}/entry_points.txt +0 -0
@@ -2852,8 +2852,8 @@ class AsyncRetriever(BaseModel):
|
|
2852
2852
|
status_extractor: Union[DpathExtractor, CustomRecordExtractor] = Field(
|
2853
2853
|
..., description="Responsible for fetching the actual status of the async job."
|
2854
2854
|
)
|
2855
|
-
download_target_extractor: Union[DpathExtractor, CustomRecordExtractor] = Field(
|
2856
|
-
|
2855
|
+
download_target_extractor: Optional[Union[DpathExtractor, CustomRecordExtractor]] = Field(
|
2856
|
+
None,
|
2857
2857
|
description="Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.",
|
2858
2858
|
)
|
2859
2859
|
download_extractor: Optional[
|
@@ -3467,6 +3467,11 @@ class ModelToComponentFactory:
|
|
3467
3467
|
transformations: List[RecordTransformation],
|
3468
3468
|
**kwargs: Any,
|
3469
3469
|
) -> AsyncRetriever:
|
3470
|
+
if model.download_target_requester and not model.download_target_extractor:
|
3471
|
+
raise ValueError(
|
3472
|
+
f"`download_target_extractor` required if using a `download_target_requester`"
|
3473
|
+
)
|
3474
|
+
|
3470
3475
|
def _get_download_retriever(
|
3471
3476
|
requester: Requester, extractor: RecordExtractor, _decoder: Decoder
|
3472
3477
|
) -> SimpleRetriever:
|
@@ -3624,11 +3629,15 @@ class ModelToComponentFactory:
|
|
3624
3629
|
status_extractor = self._create_component_from_model(
|
3625
3630
|
model=model.status_extractor, decoder=decoder, config=config, name=name
|
3626
3631
|
)
|
3627
|
-
download_target_extractor =
|
3628
|
-
|
3629
|
-
|
3630
|
-
|
3631
|
-
|
3632
|
+
download_target_extractor = (
|
3633
|
+
self._create_component_from_model(
|
3634
|
+
model=model.download_target_extractor,
|
3635
|
+
decoder=decoder,
|
3636
|
+
config=config,
|
3637
|
+
name=name,
|
3638
|
+
)
|
3639
|
+
if model.download_target_extractor
|
3640
|
+
else None
|
3632
3641
|
)
|
3633
3642
|
|
3634
3643
|
job_repository: AsyncJobRepository = AsyncHttpJobRepository(
|
@@ -1,8 +1,19 @@
|
|
1
|
+
# Download Target and Download Requester
|
2
|
+
|
3
|
+
- The `creation_response` and `polling_response` interpolation contexts are always available during the job download step of the process.
|
4
|
+
|
5
|
+
- The`download_target` interpolation context is generated by the `download_target_extractor` and made available to the job download step as well.
|
6
|
+
- if `download_target_requester` is not provided, `download_target_extractor` will get urls from the `polling_response`
|
7
|
+
- if `download_target_requester` is provided, an additional request will be made to fetch job download targets and `download_target_extractor` will operate on that response
|
8
|
+
|
9
|
+
## Some important considerations
|
10
|
+
|
11
|
+
- **Note:** If the `download_target_extractor` and `download_target_requester` are not defined, a single job download request will be made without the `download_target` context.
|
12
|
+
- **Note:** The `download_target_extractor` is required (not optional) if using a `download_target_requester`
|
13
|
+
|
1
14
|
# AsyncHttpJobRepository sequence diagram
|
2
15
|
|
3
16
|
- Components marked as optional are not required and can be ignored.
|
4
|
-
- if `download_target_requester` is not provided, `download_target_extractor` will get urls from the `polling_response`
|
5
|
-
- interpolation_context, e.g. `creation_response` or `polling_response` can be obtained from stream_slice
|
6
17
|
|
7
18
|
```mermaid
|
8
19
|
---
|
@@ -37,7 +48,7 @@ sequenceDiagram
|
|
37
48
|
UrlRequester -->> AsyncHttpJobRepository: Download URLs
|
38
49
|
|
39
50
|
AsyncHttpJobRepository ->> DownloadRetriever: Download reports
|
40
|
-
DownloadRetriever ->> Reporting Server: Retrieve report data (interpolation_context: `
|
51
|
+
DownloadRetriever ->> Reporting Server: Retrieve report data (interpolation_context: `download_target`, `creation_response`, `polling_response`)
|
41
52
|
Reporting Server -->> DownloadRetriever: Report data
|
42
53
|
DownloadRetriever -->> AsyncHttpJobRepository: Report data
|
43
54
|
else Status: Failed
|
@@ -43,7 +43,7 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
43
43
|
delete_requester: Optional[Requester]
|
44
44
|
status_extractor: DpathExtractor
|
45
45
|
status_mapping: Mapping[str, AsyncJobStatus]
|
46
|
-
download_target_extractor: DpathExtractor
|
46
|
+
download_target_extractor: Optional[DpathExtractor]
|
47
47
|
|
48
48
|
# timeout for the job to be completed, passed from `polling_job_timeout`
|
49
49
|
job_timeout: Optional[timedelta] = None
|
@@ -213,14 +213,16 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
213
213
|
|
214
214
|
"""
|
215
215
|
|
216
|
-
for
|
216
|
+
for download_target in self._get_download_targets(job):
|
217
217
|
job_slice = job.job_parameters()
|
218
218
|
stream_slice = StreamSlice(
|
219
219
|
partition=job_slice.partition,
|
220
220
|
cursor_slice=job_slice.cursor_slice,
|
221
221
|
extra_fields={
|
222
222
|
**job_slice.extra_fields,
|
223
|
-
"download_target":
|
223
|
+
"download_target": download_target,
|
224
|
+
"creation_response": self._get_creation_response_interpolation_context(job),
|
225
|
+
"polling_response": self._get_polling_response_interpolation_context(job),
|
224
226
|
},
|
225
227
|
)
|
226
228
|
for message in self.download_retriever.read_records({}, stream_slice):
|
@@ -330,9 +332,27 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
330
332
|
)
|
331
333
|
|
332
334
|
def _get_download_targets(self, job: AsyncJob) -> Iterable[str]:
|
333
|
-
|
334
|
-
|
335
|
-
|
335
|
+
"""Returns an iterable of strings to help target requests for downloading async jobs."""
|
336
|
+
# If neither download_target_extractor nor download_target_requester are provided, yield a single empty string
|
337
|
+
# to express the need to make a single download request without any download_target value
|
338
|
+
if not self.download_target_extractor:
|
339
|
+
if not self.download_target_requester:
|
340
|
+
lazy_log(
|
341
|
+
LOGGER,
|
342
|
+
logging.DEBUG,
|
343
|
+
lambda: "No download_target_extractor or download_target_requester provided. Will attempt a single download request without a `download_target`.",
|
344
|
+
)
|
345
|
+
yield ""
|
346
|
+
return
|
347
|
+
else:
|
348
|
+
raise AirbyteTracedException(
|
349
|
+
internal_message="Must define a `download_target_extractor` when using a `download_target_requester`.",
|
350
|
+
failure_type=FailureType.config_error,
|
351
|
+
)
|
352
|
+
|
353
|
+
# We have a download_target_extractor, use it to extract the donload_target
|
354
|
+
if self.download_target_requester:
|
355
|
+
# if a download_target_requester if defined, we extract from the response of a request specifically for download targets.
|
336
356
|
stream_slice: StreamSlice = StreamSlice(
|
337
357
|
partition={},
|
338
358
|
cursor_slice={},
|
@@ -346,5 +366,8 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
346
366
|
internal_message="Always expect a response or an exception from download_target_requester",
|
347
367
|
failure_type=FailureType.system_error,
|
348
368
|
)
|
369
|
+
else:
|
370
|
+
# if no download_target_requester is defined, we extract from the polling response
|
371
|
+
url_response = self._polling_job_response_by_id[job.api_job_id()]
|
349
372
|
|
350
373
|
yield from self.download_target_extractor.extract_records(url_response) # type: ignore # we expect download_target_extractor to always return list of strings
|
@@ -130,7 +130,7 @@ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=MdXptT-2
|
|
130
130
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
131
131
|
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=_zGNGq31RNy_0QBLt_EcTvgPyhj7urPdx6oA3M5-r3o,3150
|
132
132
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
|
133
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=
|
133
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=U-cTJz0M00sYP2NSeLB29148suEQO8Z-LTPiixOoHoQ,187187
|
134
134
|
airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=JHb_0d3SE6kNY10mxA5YBEKPeSbsWYjByq1gUQxepoE,953
|
135
135
|
airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py,sha256=qB4lRUrCXLTE-a3VlpOLaazHiC7RIF_FIVJesuz7ebw,8078
|
136
136
|
airbyte_cdk/sources/declarative/decoders/decoder.py,sha256=1PeKwuMK8x9dsA2zqUjSVinEWVSEgYcUS6npiW3aC2c,855
|
@@ -165,14 +165,14 @@ airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migrati
|
|
165
165
|
airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
|
166
166
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
|
167
167
|
airbyte_cdk/sources/declarative/models/base_model_with_deprecations.py,sha256=Imnj3yef0aqRdLfaUxkIYISUb8YkiPrRH_wBd-x8HjM,5999
|
168
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=
|
168
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=ePqmIj2g8gPItvflsgKmRBm91qDPGgGyD_GQDUSqjCA,131552
|
169
169
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
170
170
|
airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=nlVvHC511NUyDEEIRBkoeDTAvLqKNp-hRy8D19z8tdk,5941
|
171
171
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=wnRUP0Xeru9Rbu5OexXSDN9QWDo8YU4tT9M2LDVOgGA,802
|
172
172
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=la9Ulpc0lQewiBLKJ0FpsWxyU5XISv-ulmFRHJLJ1Pc,11292
|
173
173
|
airbyte_cdk/sources/declarative/parsers/manifest_normalizer.py,sha256=EtKjS9c94yNp3AwQC8KUCQaAYW5T3zvFYxoWYjc_buI,19729
|
174
174
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=pJmg78vqE5VfUrF_KJnWjucQ4k9IWFULeAxHCowrHXE,6806
|
175
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
175
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=X8qIjanJCog3GflyEoZ-5ECID-xqcfFQgW3E6AzyFNU,184572
|
176
176
|
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=TBC9AkGaUqHm2IKHMPN6punBIcY5tWGULowcLoAVkfw,1109
|
177
177
|
airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=VelO7zKqKtzMJ35jyFeg0ypJLQC0plqqIBNXoBW1G2E,3001
|
178
178
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=ocm4hZ4k-tEGs5HLrtI8ecWSK0hGqNH0Rvz2byx_HZk,6927
|
@@ -181,7 +181,7 @@ airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha25
|
|
181
181
|
airbyte_cdk/sources/declarative/partition_routers/partition_router.py,sha256=v8C1farE9_UVnIN_PV7qnn2p3C_qK6DJ8b0-6JmsAPc,1288
|
182
182
|
airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py,sha256=iP6eL6gYhGL8PuH9mXjL_PfqFyJ3xefmuFxf7yzy66s,1778
|
183
183
|
airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py,sha256=xV_xCTwlO2LiOBCrk9hiP1r6N3KatiAa9dRZUv27iWQ,18423
|
184
|
-
airbyte_cdk/sources/declarative/requesters/README.md,sha256=
|
184
|
+
airbyte_cdk/sources/declarative/requesters/README.md,sha256=D-3n-Hlf6IrouihdieY_lOWSZUeMwBFOlGRa_C0nJ3k,3450
|
185
185
|
airbyte_cdk/sources/declarative/requesters/__init__.py,sha256=d7a3OoHbqaJDyyPli3nqqJ2yAW_SLX6XDaBAKOwvpxw,364
|
186
186
|
airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py,sha256=SkEDcJxlT1683rNx93K9whoS0OyUukkuOfToGtgpF58,776
|
187
187
|
airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py,sha256=1WZdpFmWL6W_Dko0qjflTaKIWeqt8jHT-D6HcujIp3s,884
|
@@ -196,7 +196,7 @@ airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.
|
|
196
196
|
airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py,sha256=q0YkeYUUWO6iErUy0vjqiOkhg8_9d5YcCmtlpXAJJ9E,1314
|
197
197
|
airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py,sha256=Tan66odx8VHzfdyyXMQkXz2pJYksllGqvxmpoajgcK4,669
|
198
198
|
airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py,sha256=E-fQbt4ShfxZVoqfnmOx69C6FUPWZz8BIqI3DN9Kcjs,7935
|
199
|
-
airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=
|
199
|
+
airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=RLaQlvb-kL2FqnhuV3TJlJhSfnBSaDIgUgM81qFsCkY,15847
|
200
200
|
airbyte_cdk/sources/declarative/requesters/http_requester.py,sha256=cktdjnOu-o98smdCdrWC361iWNamPTZ-csT32OFh00c,18920
|
201
201
|
airbyte_cdk/sources/declarative/requesters/paginators/__init__.py,sha256=uArbKs9JKNCt7t9tZoeWwjDpyI1HoPp29FNW0JzvaEM,644
|
202
202
|
airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py,sha256=kH_gwgULvEr8na-sST93gpr5pgqk8oT2aZNPebyMqlc,12293
|
@@ -457,9 +457,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
|
|
457
457
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=9YDJmnIGFsT51CVQf2tSSvTapGimITjEFGbUTSZAGTI,963
|
458
458
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
459
459
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
460
|
-
airbyte_cdk-7.0.
|
461
|
-
airbyte_cdk-7.0.
|
462
|
-
airbyte_cdk-7.0.
|
463
|
-
airbyte_cdk-7.0.
|
464
|
-
airbyte_cdk-7.0.
|
465
|
-
airbyte_cdk-7.0.
|
460
|
+
airbyte_cdk-7.0.4.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
461
|
+
airbyte_cdk-7.0.4.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
|
462
|
+
airbyte_cdk-7.0.4.dist-info/METADATA,sha256=mPycx2yG98V8BkQJp9vwCLHAjACLczgUquxx-jgf7og,6799
|
463
|
+
airbyte_cdk-7.0.4.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
464
|
+
airbyte_cdk-7.0.4.dist-info/entry_points.txt,sha256=eLZ2UYvJZGm1s07Pplcs--1Gim60YhZWTb53j_dghwU,195
|
465
|
+
airbyte_cdk-7.0.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|