airbyte-cdk 7.0.2__py3-none-any.whl → 7.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3797,7 +3797,6 @@ definitions:
3797
3797
  - polling_requester
3798
3798
  - download_requester
3799
3799
  - status_extractor
3800
- - download_target_extractor
3801
3800
  properties:
3802
3801
  type:
3803
3802
  type: string
@@ -2852,8 +2852,8 @@ class AsyncRetriever(BaseModel):
2852
2852
  status_extractor: Union[DpathExtractor, CustomRecordExtractor] = Field(
2853
2853
  ..., description="Responsible for fetching the actual status of the async job."
2854
2854
  )
2855
- download_target_extractor: Union[DpathExtractor, CustomRecordExtractor] = Field(
2856
- ...,
2855
+ download_target_extractor: Optional[Union[DpathExtractor, CustomRecordExtractor]] = Field(
2856
+ None,
2857
2857
  description="Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.",
2858
2858
  )
2859
2859
  download_extractor: Optional[
@@ -1976,7 +1976,10 @@ class ModelToComponentFactory:
1976
1976
  primary_key = model.primary_key.__root__ if model.primary_key else None
1977
1977
 
1978
1978
  partition_router = self._build_stream_slicer_from_partition_router(
1979
- model.retriever, config, stream_name=model.name
1979
+ model.retriever,
1980
+ config,
1981
+ stream_name=model.name,
1982
+ **kwargs,
1980
1983
  )
1981
1984
  concurrent_cursor = self._build_concurrent_cursor(model, partition_router, config)
1982
1985
  if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
@@ -2155,10 +2158,11 @@ class ModelToComponentFactory:
2155
2158
  ],
2156
2159
  config: Config,
2157
2160
  stream_name: Optional[str] = None,
2161
+ **kwargs: Any,
2158
2162
  ) -> PartitionRouter:
2159
2163
  if (
2160
2164
  hasattr(model, "partition_router")
2161
- and isinstance(model, SimpleRetrieverModel | AsyncRetrieverModel)
2165
+ and isinstance(model, (SimpleRetrieverModel, AsyncRetrieverModel, CustomRetrieverModel))
2162
2166
  and model.partition_router
2163
2167
  ):
2164
2168
  stream_slicer_model = model.partition_router
@@ -2172,6 +2176,23 @@ class ModelToComponentFactory:
2172
2176
  ],
2173
2177
  parameters={},
2174
2178
  )
2179
+ elif isinstance(stream_slicer_model, dict):
2180
+ # partition router comes from CustomRetrieverModel therefore has not been parsed as a model
2181
+ params = stream_slicer_model.get("$parameters")
2182
+ if not isinstance(params, dict):
2183
+ params = {}
2184
+ stream_slicer_model["$parameters"] = params
2185
+
2186
+ if stream_name is not None:
2187
+ params["stream_name"] = stream_name
2188
+
2189
+ return self._create_nested_component( # type: ignore[no-any-return] # There is no guarantee that this will return a stream slicer. If not, we expect an AttributeError during the call to `stream_slices`
2190
+ model,
2191
+ "partition_router",
2192
+ stream_slicer_model,
2193
+ config,
2194
+ **kwargs,
2195
+ )
2175
2196
  else:
2176
2197
  return self._create_component_from_model( # type: ignore[no-any-return] # Will be created PartitionRouter as stream_slicer_model is model.partition_router
2177
2198
  model=stream_slicer_model, config=config, stream_name=stream_name or ""
@@ -2886,7 +2907,7 @@ class ModelToComponentFactory:
2886
2907
  )
2887
2908
 
2888
2909
  def create_parent_stream_config(
2889
- self, model: ParentStreamConfigModel, config: Config, stream_name: str, **kwargs: Any
2910
+ self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any
2890
2911
  ) -> ParentStreamConfig:
2891
2912
  declarative_stream = self._create_component_from_model(
2892
2913
  model.stream,
@@ -3446,6 +3467,11 @@ class ModelToComponentFactory:
3446
3467
  transformations: List[RecordTransformation],
3447
3468
  **kwargs: Any,
3448
3469
  ) -> AsyncRetriever:
3470
+ if model.download_target_requester and not model.download_target_extractor:
3471
+ raise ValueError(
3472
+ f"`download_target_extractor` required if using a `download_target_requester`"
3473
+ )
3474
+
3449
3475
  def _get_download_retriever(
3450
3476
  requester: Requester, extractor: RecordExtractor, _decoder: Decoder
3451
3477
  ) -> SimpleRetriever:
@@ -3603,11 +3629,15 @@ class ModelToComponentFactory:
3603
3629
  status_extractor = self._create_component_from_model(
3604
3630
  model=model.status_extractor, decoder=decoder, config=config, name=name
3605
3631
  )
3606
- download_target_extractor = self._create_component_from_model(
3607
- model=model.download_target_extractor,
3608
- decoder=decoder,
3609
- config=config,
3610
- name=name,
3632
+ download_target_extractor = (
3633
+ self._create_component_from_model(
3634
+ model=model.download_target_extractor,
3635
+ decoder=decoder,
3636
+ config=config,
3637
+ name=name,
3638
+ )
3639
+ if model.download_target_extractor
3640
+ else None
3611
3641
  )
3612
3642
 
3613
3643
  job_repository: AsyncJobRepository = AsyncHttpJobRepository(
@@ -3693,14 +3723,19 @@ class ModelToComponentFactory:
3693
3723
  )
3694
3724
 
3695
3725
  def create_substream_partition_router(
3696
- self, model: SubstreamPartitionRouterModel, config: Config, **kwargs: Any
3726
+ self,
3727
+ model: SubstreamPartitionRouterModel,
3728
+ config: Config,
3729
+ *,
3730
+ stream_name: str,
3731
+ **kwargs: Any,
3697
3732
  ) -> SubstreamPartitionRouter:
3698
3733
  parent_stream_configs = []
3699
3734
  if model.parent_stream_configs:
3700
3735
  parent_stream_configs.extend(
3701
3736
  [
3702
3737
  self.create_parent_stream_config_with_substream_wrapper(
3703
- model=parent_stream_config, config=config, **kwargs
3738
+ model=parent_stream_config, config=config, stream_name=stream_name, **kwargs
3704
3739
  )
3705
3740
  for parent_stream_config in model.parent_stream_configs
3706
3741
  ]
@@ -3720,7 +3755,7 @@ class ModelToComponentFactory:
3720
3755
 
3721
3756
  # This flag will be used exclusively for StateDelegatingStream when a parent stream is created
3722
3757
  has_parent_state = bool(
3723
- self._connector_state_manager.get_stream_state(kwargs.get("stream_name", ""), None)
3758
+ self._connector_state_manager.get_stream_state(stream_name, None)
3724
3759
  if model.incremental_dependency
3725
3760
  else False
3726
3761
  )
@@ -4113,11 +4148,17 @@ class ModelToComponentFactory:
4113
4148
  )
4114
4149
 
4115
4150
  def create_grouping_partition_router(
4116
- self, model: GroupingPartitionRouterModel, config: Config, **kwargs: Any
4151
+ self,
4152
+ model: GroupingPartitionRouterModel,
4153
+ config: Config,
4154
+ *,
4155
+ stream_name: str,
4156
+ **kwargs: Any,
4117
4157
  ) -> GroupingPartitionRouter:
4118
4158
  underlying_router = self._create_component_from_model(
4119
4159
  model=model.underlying_partition_router,
4120
4160
  config=config,
4161
+ stream_name=stream_name,
4121
4162
  **kwargs,
4122
4163
  )
4123
4164
  if model.group_size < 1:
@@ -1,8 +1,19 @@
1
+ # Download Target and Download Requester
2
+
3
+ - The `creation_response` and `polling_response` interpolation contexts are always available during the job download step of the process.
4
+
5
+ - The`download_target` interpolation context is generated by the `download_target_extractor` and made available to the job download step as well.
6
+ - if `download_target_requester` is not provided, `download_target_extractor` will get urls from the `polling_response`
7
+ - if `download_target_requester` is provided, an additional request will be made to fetch job download targets and `download_target_extractor` will operate on that response
8
+
9
+ ## Some important considerations
10
+
11
+ - **Note:** If the `download_target_extractor` and `download_target_requester` are not defined, a single job download request will be made without the `download_target` context.
12
+ - **Note:** The `download_target_extractor` is required (not optional) if using a `download_target_requester`
13
+
1
14
  # AsyncHttpJobRepository sequence diagram
2
15
 
3
16
  - Components marked as optional are not required and can be ignored.
4
- - if `download_target_requester` is not provided, `download_target_extractor` will get urls from the `polling_response`
5
- - interpolation_context, e.g. `creation_response` or `polling_response` can be obtained from stream_slice
6
17
 
7
18
  ```mermaid
8
19
  ---
@@ -37,7 +48,7 @@ sequenceDiagram
37
48
  UrlRequester -->> AsyncHttpJobRepository: Download URLs
38
49
 
39
50
  AsyncHttpJobRepository ->> DownloadRetriever: Download reports
40
- DownloadRetriever ->> Reporting Server: Retrieve report data (interpolation_context: `url`)
51
+ DownloadRetriever ->> Reporting Server: Retrieve report data (interpolation_context: `download_target`, `creation_response`, `polling_response`)
41
52
  Reporting Server -->> DownloadRetriever: Report data
42
53
  DownloadRetriever -->> AsyncHttpJobRepository: Report data
43
54
  else Status: Failed
@@ -43,7 +43,7 @@ class AsyncHttpJobRepository(AsyncJobRepository):
43
43
  delete_requester: Optional[Requester]
44
44
  status_extractor: DpathExtractor
45
45
  status_mapping: Mapping[str, AsyncJobStatus]
46
- download_target_extractor: DpathExtractor
46
+ download_target_extractor: Optional[DpathExtractor]
47
47
 
48
48
  # timeout for the job to be completed, passed from `polling_job_timeout`
49
49
  job_timeout: Optional[timedelta] = None
@@ -213,14 +213,16 @@ class AsyncHttpJobRepository(AsyncJobRepository):
213
213
 
214
214
  """
215
215
 
216
- for target_url in self._get_download_targets(job):
216
+ for download_target in self._get_download_targets(job):
217
217
  job_slice = job.job_parameters()
218
218
  stream_slice = StreamSlice(
219
219
  partition=job_slice.partition,
220
220
  cursor_slice=job_slice.cursor_slice,
221
221
  extra_fields={
222
222
  **job_slice.extra_fields,
223
- "download_target": target_url,
223
+ "download_target": download_target,
224
+ "creation_response": self._get_creation_response_interpolation_context(job),
225
+ "polling_response": self._get_polling_response_interpolation_context(job),
224
226
  },
225
227
  )
226
228
  for message in self.download_retriever.read_records({}, stream_slice):
@@ -330,9 +332,27 @@ class AsyncHttpJobRepository(AsyncJobRepository):
330
332
  )
331
333
 
332
334
  def _get_download_targets(self, job: AsyncJob) -> Iterable[str]:
333
- if not self.download_target_requester:
334
- url_response = self._polling_job_response_by_id[job.api_job_id()]
335
- else:
335
+ """Returns an iterable of strings to help target requests for downloading async jobs."""
336
+ # If neither download_target_extractor nor download_target_requester are provided, yield a single empty string
337
+ # to express the need to make a single download request without any download_target value
338
+ if not self.download_target_extractor:
339
+ if not self.download_target_requester:
340
+ lazy_log(
341
+ LOGGER,
342
+ logging.DEBUG,
343
+ lambda: "No download_target_extractor or download_target_requester provided. Will attempt a single download request without a `download_target`.",
344
+ )
345
+ yield ""
346
+ return
347
+ else:
348
+ raise AirbyteTracedException(
349
+ internal_message="Must define a `download_target_extractor` when using a `download_target_requester`.",
350
+ failure_type=FailureType.config_error,
351
+ )
352
+
353
+ # We have a download_target_extractor, use it to extract the donload_target
354
+ if self.download_target_requester:
355
+ # if a download_target_requester if defined, we extract from the response of a request specifically for download targets.
336
356
  stream_slice: StreamSlice = StreamSlice(
337
357
  partition={},
338
358
  cursor_slice={},
@@ -346,5 +366,8 @@ class AsyncHttpJobRepository(AsyncJobRepository):
346
366
  internal_message="Always expect a response or an exception from download_target_requester",
347
367
  failure_type=FailureType.system_error,
348
368
  )
369
+ else:
370
+ # if no download_target_requester is defined, we extract from the polling response
371
+ url_response = self._polling_job_response_by_id[job.api_job_id()]
349
372
 
350
373
  yield from self.download_target_extractor.extract_records(url_response) # type: ignore # we expect download_target_extractor to always return list of strings
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 7.0.2
3
+ Version: 7.0.4
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -130,7 +130,7 @@ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=MdXptT-2
130
130
  airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
131
131
  airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=_zGNGq31RNy_0QBLt_EcTvgPyhj7urPdx6oA3M5-r3o,3150
132
132
  airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
133
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=L-xvZ3tKpBQBYLoh9SwoPyQCoEbr8QywyYITF0kXf7w,187221
133
+ airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=U-cTJz0M00sYP2NSeLB29148suEQO8Z-LTPiixOoHoQ,187187
134
134
  airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=JHb_0d3SE6kNY10mxA5YBEKPeSbsWYjByq1gUQxepoE,953
135
135
  airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py,sha256=qB4lRUrCXLTE-a3VlpOLaazHiC7RIF_FIVJesuz7ebw,8078
136
136
  airbyte_cdk/sources/declarative/decoders/decoder.py,sha256=1PeKwuMK8x9dsA2zqUjSVinEWVSEgYcUS6npiW3aC2c,855
@@ -165,14 +165,14 @@ airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migrati
165
165
  airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
166
166
  airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
167
167
  airbyte_cdk/sources/declarative/models/base_model_with_deprecations.py,sha256=Imnj3yef0aqRdLfaUxkIYISUb8YkiPrRH_wBd-x8HjM,5999
168
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=0ttIVcgsg0ZKyCfKXr1DP3FGcSlOPVbTeJkMI6y_utA,131541
168
+ airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=ePqmIj2g8gPItvflsgKmRBm91qDPGgGyD_GQDUSqjCA,131552
169
169
  airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
170
170
  airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=nlVvHC511NUyDEEIRBkoeDTAvLqKNp-hRy8D19z8tdk,5941
171
171
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=wnRUP0Xeru9Rbu5OexXSDN9QWDo8YU4tT9M2LDVOgGA,802
172
172
  airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=la9Ulpc0lQewiBLKJ0FpsWxyU5XISv-ulmFRHJLJ1Pc,11292
173
173
  airbyte_cdk/sources/declarative/parsers/manifest_normalizer.py,sha256=EtKjS9c94yNp3AwQC8KUCQaAYW5T3zvFYxoWYjc_buI,19729
174
174
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=pJmg78vqE5VfUrF_KJnWjucQ4k9IWFULeAxHCowrHXE,6806
175
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=G1IR4MW9lOlIJyebJzIKzqhM9VI9yi-fUQ-BqooFhCw,183093
175
+ airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=X8qIjanJCog3GflyEoZ-5ECID-xqcfFQgW3E6AzyFNU,184572
176
176
  airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=TBC9AkGaUqHm2IKHMPN6punBIcY5tWGULowcLoAVkfw,1109
177
177
  airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=VelO7zKqKtzMJ35jyFeg0ypJLQC0plqqIBNXoBW1G2E,3001
178
178
  airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=ocm4hZ4k-tEGs5HLrtI8ecWSK0hGqNH0Rvz2byx_HZk,6927
@@ -181,7 +181,7 @@ airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha25
181
181
  airbyte_cdk/sources/declarative/partition_routers/partition_router.py,sha256=v8C1farE9_UVnIN_PV7qnn2p3C_qK6DJ8b0-6JmsAPc,1288
182
182
  airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py,sha256=iP6eL6gYhGL8PuH9mXjL_PfqFyJ3xefmuFxf7yzy66s,1778
183
183
  airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py,sha256=xV_xCTwlO2LiOBCrk9hiP1r6N3KatiAa9dRZUv27iWQ,18423
184
- airbyte_cdk/sources/declarative/requesters/README.md,sha256=DQll2qsIzzTiiP35kJp16ONpr7cFeUQNgPfhl5krB24,2675
184
+ airbyte_cdk/sources/declarative/requesters/README.md,sha256=D-3n-Hlf6IrouihdieY_lOWSZUeMwBFOlGRa_C0nJ3k,3450
185
185
  airbyte_cdk/sources/declarative/requesters/__init__.py,sha256=d7a3OoHbqaJDyyPli3nqqJ2yAW_SLX6XDaBAKOwvpxw,364
186
186
  airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py,sha256=SkEDcJxlT1683rNx93K9whoS0OyUukkuOfToGtgpF58,776
187
187
  airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py,sha256=1WZdpFmWL6W_Dko0qjflTaKIWeqt8jHT-D6HcujIp3s,884
@@ -196,7 +196,7 @@ airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.
196
196
  airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py,sha256=q0YkeYUUWO6iErUy0vjqiOkhg8_9d5YcCmtlpXAJJ9E,1314
197
197
  airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py,sha256=Tan66odx8VHzfdyyXMQkXz2pJYksllGqvxmpoajgcK4,669
198
198
  airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py,sha256=E-fQbt4ShfxZVoqfnmOx69C6FUPWZz8BIqI3DN9Kcjs,7935
199
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=yU61dCEX9961QMA6lt26c0bOM5MXp_iuxuSljwD_5Jo,14326
199
+ airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=RLaQlvb-kL2FqnhuV3TJlJhSfnBSaDIgUgM81qFsCkY,15847
200
200
  airbyte_cdk/sources/declarative/requesters/http_requester.py,sha256=cktdjnOu-o98smdCdrWC361iWNamPTZ-csT32OFh00c,18920
201
201
  airbyte_cdk/sources/declarative/requesters/paginators/__init__.py,sha256=uArbKs9JKNCt7t9tZoeWwjDpyI1HoPp29FNW0JzvaEM,644
202
202
  airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py,sha256=kH_gwgULvEr8na-sST93gpr5pgqk8oT2aZNPebyMqlc,12293
@@ -457,9 +457,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
457
457
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=9YDJmnIGFsT51CVQf2tSSvTapGimITjEFGbUTSZAGTI,963
458
458
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
459
459
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
460
- airbyte_cdk-7.0.2.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
461
- airbyte_cdk-7.0.2.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
462
- airbyte_cdk-7.0.2.dist-info/METADATA,sha256=ANx0SZJHHk5j9u3LC17Fla3jB2_IjgT5sUNYd6_-4iA,6799
463
- airbyte_cdk-7.0.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
464
- airbyte_cdk-7.0.2.dist-info/entry_points.txt,sha256=eLZ2UYvJZGm1s07Pplcs--1Gim60YhZWTb53j_dghwU,195
465
- airbyte_cdk-7.0.2.dist-info/RECORD,,
460
+ airbyte_cdk-7.0.4.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
461
+ airbyte_cdk-7.0.4.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
462
+ airbyte_cdk-7.0.4.dist-info/METADATA,sha256=mPycx2yG98V8BkQJp9vwCLHAjACLczgUquxx-jgf7og,6799
463
+ airbyte_cdk-7.0.4.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
464
+ airbyte_cdk-7.0.4.dist-info/entry_points.txt,sha256=eLZ2UYvJZGm1s07Pplcs--1Gim60YhZWTb53j_dghwU,195
465
+ airbyte_cdk-7.0.4.dist-info/RECORD,,