airbyte-cdk 6.37.0.dev1__py3-none-any.whl → 6.37.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. airbyte_cdk/connector_builder/models.py +16 -14
  2. airbyte_cdk/connector_builder/test_reader/helpers.py +120 -22
  3. airbyte_cdk/connector_builder/test_reader/message_grouper.py +16 -3
  4. airbyte_cdk/connector_builder/test_reader/types.py +9 -1
  5. airbyte_cdk/sources/declarative/auth/token_provider.py +1 -0
  6. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +43 -7
  7. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +7 -1
  8. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +67 -46
  9. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +13 -2
  10. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +1 -0
  11. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +83 -17
  12. airbyte_cdk/sources/declarative/interpolation/macros.py +2 -0
  13. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +30 -45
  14. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +18 -4
  15. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +171 -70
  16. airbyte_cdk/sources/declarative/partition_routers/__init__.py +0 -4
  17. airbyte_cdk/sources/declarative/requesters/README.md +5 -5
  18. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +60 -17
  19. airbyte_cdk/sources/declarative/requesters/http_requester.py +7 -1
  20. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +10 -3
  21. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +2 -2
  22. airbyte_cdk/sources/http_logger.py +3 -0
  23. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +2 -1
  24. airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
  25. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +1 -0
  26. {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.1.dist-info}/METADATA +2 -2
  27. {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.1.dist-info}/RECORD +31 -31
  28. airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +0 -136
  29. {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.1.dist-info}/LICENSE.txt +0 -0
  30. {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.1.dist-info}/LICENSE_SHORT +0 -0
  31. {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.1.dist-info}/WHEEL +0 -0
  32. {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.1.dist-info}/entry_points.txt +0 -0
@@ -227,9 +227,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
227
227
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
228
228
  FlattenFields as FlattenFieldsModel,
229
229
  )
230
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
231
- GroupingPartitionRouter as GroupingPartitionRouterModel,
232
- )
233
230
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
234
231
  GzipDecoder as GzipDecoderModel,
235
232
  )
@@ -248,6 +245,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
248
245
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
249
246
  HttpResponseFilter as HttpResponseFilterModel,
250
247
  )
248
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
249
+ IncrementingCountCursor as IncrementingCountCursorModel,
250
+ )
251
251
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
252
252
  InlineSchemaLoader as InlineSchemaLoaderModel,
253
253
  )
@@ -382,7 +382,6 @@ from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
382
382
  )
383
383
  from airbyte_cdk.sources.declarative.partition_routers import (
384
384
  CartesianProductStreamSlicer,
385
- GroupingPartitionRouter,
386
385
  ListPartitionRouter,
387
386
  PartitionRouter,
388
387
  SinglePartitionRouter,
@@ -500,6 +499,9 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_sta
500
499
  CustomFormatConcurrentStreamStateConverter,
501
500
  DateTimeStreamStateConverter,
502
501
  )
502
+ from airbyte_cdk.sources.streams.concurrent.state_converters.incrementing_count_stream_state_converter import (
503
+ IncrementingCountStreamStateConverter,
504
+ )
503
505
  from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction
504
506
  from airbyte_cdk.sources.types import Config
505
507
  from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
@@ -588,6 +590,7 @@ class ModelToComponentFactory:
588
590
  FlattenFieldsModel: self.create_flatten_fields,
589
591
  DpathFlattenFieldsModel: self.create_dpath_flatten_fields,
590
592
  IterableDecoderModel: self.create_iterable_decoder,
593
+ IncrementingCountCursorModel: self.create_incrementing_count_cursor,
591
594
  XmlDecoderModel: self.create_xml_decoder,
592
595
  JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
593
596
  DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
@@ -628,7 +631,6 @@ class ModelToComponentFactory:
628
631
  UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
629
632
  RateModel: self.create_rate,
630
633
  HttpRequestRegexMatcherModel: self.create_http_request_matcher,
631
- GroupingPartitionRouterModel: self.create_grouping_partition_router,
632
634
  }
633
635
 
634
636
  # Needed for the case where we need to perform a second parse on the fields of a custom component
@@ -1194,6 +1196,70 @@ class ModelToComponentFactory:
1194
1196
  clamping_strategy=clamping_strategy,
1195
1197
  )
1196
1198
 
1199
+ def create_concurrent_cursor_from_incrementing_count_cursor(
1200
+ self,
1201
+ model_type: Type[BaseModel],
1202
+ component_definition: ComponentDefinition,
1203
+ stream_name: str,
1204
+ stream_namespace: Optional[str],
1205
+ config: Config,
1206
+ message_repository: Optional[MessageRepository] = None,
1207
+ **kwargs: Any,
1208
+ ) -> ConcurrentCursor:
1209
+ # Per-partition incremental streams can dynamically create child cursors which will pass their current
1210
+ # state via the stream_state keyword argument. Incremental syncs without parent streams use the
1211
+ # incoming state and connector_state_manager that is initialized when the component factory is created
1212
+ stream_state = (
1213
+ self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
1214
+ if "stream_state" not in kwargs
1215
+ else kwargs["stream_state"]
1216
+ )
1217
+
1218
+ component_type = component_definition.get("type")
1219
+ if component_definition.get("type") != model_type.__name__:
1220
+ raise ValueError(
1221
+ f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1222
+ )
1223
+
1224
+ incrementing_count_cursor_model = model_type.parse_obj(component_definition)
1225
+
1226
+ if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel):
1227
+ raise ValueError(
1228
+ f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}"
1229
+ )
1230
+
1231
+ interpolated_start_value = (
1232
+ InterpolatedString.create(
1233
+ incrementing_count_cursor_model.start_value, # type: ignore
1234
+ parameters=incrementing_count_cursor_model.parameters or {},
1235
+ )
1236
+ if incrementing_count_cursor_model.start_value
1237
+ else 0
1238
+ )
1239
+
1240
+ interpolated_cursor_field = InterpolatedString.create(
1241
+ incrementing_count_cursor_model.cursor_field,
1242
+ parameters=incrementing_count_cursor_model.parameters or {},
1243
+ )
1244
+ cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1245
+
1246
+ connector_state_converter = IncrementingCountStreamStateConverter(
1247
+ is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
1248
+ )
1249
+
1250
+ return ConcurrentCursor(
1251
+ stream_name=stream_name,
1252
+ stream_namespace=stream_namespace,
1253
+ stream_state=stream_state,
1254
+ message_repository=message_repository or self._message_repository,
1255
+ connector_state_manager=self._connector_state_manager,
1256
+ connector_state_converter=connector_state_converter,
1257
+ cursor_field=cursor_field,
1258
+ slice_boundary_fields=None,
1259
+ start=interpolated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1260
+ end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1261
+ )
1262
+
1197
1263
  def _assemble_weekday(self, weekday: str) -> Weekday:
1198
1264
  match weekday:
1199
1265
  case "MONDAY":
@@ -1627,6 +1693,31 @@ class ModelToComponentFactory:
1627
1693
  config=config,
1628
1694
  parameters=model.parameters or {},
1629
1695
  )
1696
+ elif model.incremental_sync and isinstance(
1697
+ model.incremental_sync, IncrementingCountCursorModel
1698
+ ):
1699
+ cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore
1700
+
1701
+ start_time_option = (
1702
+ self._create_component_from_model(
1703
+ cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
1704
+ config,
1705
+ parameters=cursor_model.parameters or {},
1706
+ )
1707
+ if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
1708
+ else None
1709
+ )
1710
+
1711
+ # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but
1712
+ # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time
1713
+ partition_field_start = "start"
1714
+
1715
+ request_options_provider = DatetimeBasedRequestOptionsProvider(
1716
+ start_time_option=start_time_option,
1717
+ partition_field_start=partition_field_start,
1718
+ config=config,
1719
+ parameters=model.parameters or {},
1720
+ )
1630
1721
  else:
1631
1722
  request_options_provider = None
1632
1723
 
@@ -2096,10 +2187,10 @@ class ModelToComponentFactory:
2096
2187
  def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2097
2188
  return JsonDecoder(parameters={})
2098
2189
 
2099
- @staticmethod
2100
- def create_csv_decoder(model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2190
+ def create_csv_decoder(self, model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2101
2191
  return CompositeRawDecoder(
2102
- parser=ModelToComponentFactory._get_parser(model, config), stream_response=True
2192
+ parser=ModelToComponentFactory._get_parser(model, config),
2193
+ stream_response=False if self._emit_connector_builder_messages else True,
2103
2194
  )
2104
2195
 
2105
2196
  @staticmethod
@@ -2108,10 +2199,28 @@ class ModelToComponentFactory:
2108
2199
  parser=ModelToComponentFactory._get_parser(model, config), stream_response=True
2109
2200
  )
2110
2201
 
2111
- @staticmethod
2112
- def create_gzip_decoder(model: GzipDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2202
+ def create_gzip_decoder(
2203
+ self, model: GzipDecoderModel, config: Config, **kwargs: Any
2204
+ ) -> Decoder:
2113
2205
  return CompositeRawDecoder(
2114
- parser=ModelToComponentFactory._get_parser(model, config), stream_response=True
2206
+ parser=ModelToComponentFactory._get_parser(model, config),
2207
+ stream_response=False if self._emit_connector_builder_messages else True,
2208
+ )
2209
+
2210
+ @staticmethod
2211
+ def create_incrementing_count_cursor(
2212
+ model: IncrementingCountCursorModel, config: Config, **kwargs: Any
2213
+ ) -> DatetimeBasedCursor:
2214
+ # This should not actually get used anywhere at runtime, but needed to add this to pass checks since
2215
+ # we still parse models into components. The issue is that there's no runtime implementation of a
2216
+ # IncrementingCountCursor.
2217
+ # A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor.
2218
+ return DatetimeBasedCursor(
2219
+ cursor_field=model.cursor_field,
2220
+ datetime_format="%Y-%m-%d",
2221
+ start_datetime="2024-12-12",
2222
+ config=config,
2223
+ parameters={},
2115
2224
  )
2116
2225
 
2117
2226
  @staticmethod
@@ -2632,6 +2741,47 @@ class ModelToComponentFactory:
2632
2741
  transformations: List[RecordTransformation],
2633
2742
  **kwargs: Any,
2634
2743
  ) -> AsyncRetriever:
2744
+ def _get_download_retriever() -> SimpleRetrieverTestReadDecorator | SimpleRetriever:
2745
+ record_selector = RecordSelector(
2746
+ extractor=download_extractor,
2747
+ name=name,
2748
+ record_filter=None,
2749
+ transformations=transformations,
2750
+ schema_normalization=TypeTransformer(TransformConfig.NoTransform),
2751
+ config=config,
2752
+ parameters={},
2753
+ )
2754
+ paginator = (
2755
+ self._create_component_from_model(
2756
+ model=model.download_paginator, decoder=decoder, config=config, url_base=""
2757
+ )
2758
+ if model.download_paginator
2759
+ else NoPagination(parameters={})
2760
+ )
2761
+ maximum_number_of_slices = self._limit_slices_fetched or 5
2762
+
2763
+ if self._limit_slices_fetched or self._emit_connector_builder_messages:
2764
+ return SimpleRetrieverTestReadDecorator(
2765
+ requester=download_requester,
2766
+ record_selector=record_selector,
2767
+ primary_key=None,
2768
+ name=job_download_components_name,
2769
+ paginator=paginator,
2770
+ config=config,
2771
+ parameters={},
2772
+ maximum_number_of_slices=maximum_number_of_slices,
2773
+ )
2774
+
2775
+ return SimpleRetriever(
2776
+ requester=download_requester,
2777
+ record_selector=record_selector,
2778
+ primary_key=None,
2779
+ name=job_download_components_name,
2780
+ paginator=paginator,
2781
+ config=config,
2782
+ parameters={},
2783
+ )
2784
+
2635
2785
  decoder = (
2636
2786
  self._create_component_from_model(model=model.decoder, config=config)
2637
2787
  if model.decoder
@@ -2685,29 +2835,7 @@ class ModelToComponentFactory:
2685
2835
  config=config,
2686
2836
  name=job_download_components_name,
2687
2837
  )
2688
- download_retriever = SimpleRetriever(
2689
- requester=download_requester,
2690
- record_selector=RecordSelector(
2691
- extractor=download_extractor,
2692
- name=name,
2693
- record_filter=None,
2694
- transformations=transformations,
2695
- schema_normalization=TypeTransformer(TransformConfig.NoTransform),
2696
- config=config,
2697
- parameters={},
2698
- ),
2699
- primary_key=None,
2700
- name=job_download_components_name,
2701
- paginator=(
2702
- self._create_component_from_model(
2703
- model=model.download_paginator, decoder=decoder, config=config, url_base=""
2704
- )
2705
- if model.download_paginator
2706
- else NoPagination(parameters={})
2707
- ),
2708
- config=config,
2709
- parameters={},
2710
- )
2838
+ download_retriever = _get_download_retriever()
2711
2839
  abort_requester = (
2712
2840
  self._create_component_from_model(
2713
2841
  model=model.abort_requester,
@@ -2728,32 +2856,32 @@ class ModelToComponentFactory:
2728
2856
  if model.delete_requester
2729
2857
  else None
2730
2858
  )
2731
- url_requester = (
2859
+ download_target_requester = (
2732
2860
  self._create_component_from_model(
2733
- model=model.url_requester,
2861
+ model=model.download_target_requester,
2734
2862
  decoder=decoder,
2735
2863
  config=config,
2736
2864
  name=f"job extract_url - {name}",
2737
2865
  )
2738
- if model.url_requester
2866
+ if model.download_target_requester
2739
2867
  else None
2740
2868
  )
2741
2869
  status_extractor = self._create_component_from_model(
2742
2870
  model=model.status_extractor, decoder=decoder, config=config, name=name
2743
2871
  )
2744
- urls_extractor = self._create_component_from_model(
2745
- model=model.urls_extractor, decoder=decoder, config=config, name=name
2872
+ download_target_extractor = self._create_component_from_model(
2873
+ model=model.download_target_extractor, decoder=decoder, config=config, name=name
2746
2874
  )
2747
2875
  job_repository: AsyncJobRepository = AsyncHttpJobRepository(
2748
2876
  creation_requester=creation_requester,
2749
2877
  polling_requester=polling_requester,
2750
2878
  download_retriever=download_retriever,
2751
- url_requester=url_requester,
2879
+ download_target_requester=download_target_requester,
2752
2880
  abort_requester=abort_requester,
2753
2881
  delete_requester=delete_requester,
2754
2882
  status_extractor=status_extractor,
2755
2883
  status_mapping=self._create_async_job_status_mapping(model.status_mapping, config),
2756
- urls_extractor=urls_extractor,
2884
+ download_target_extractor=download_target_extractor,
2757
2885
  )
2758
2886
 
2759
2887
  async_job_partition_router = AsyncJobPartitionRouter(
@@ -3029,8 +3157,9 @@ class ModelToComponentFactory:
3029
3157
  )
3030
3158
 
3031
3159
  def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate:
3160
+ interpolated_limit = InterpolatedString.create(str(model.limit), parameters={})
3032
3161
  return Rate(
3033
- limit=model.limit,
3162
+ limit=int(interpolated_limit.eval(config=config)),
3034
3163
  interval=parse_duration(model.interval),
3035
3164
  )
3036
3165
 
@@ -3049,31 +3178,3 @@ class ModelToComponentFactory:
3049
3178
  self._api_budget = self.create_component(
3050
3179
  model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config
3051
3180
  )
3052
-
3053
- def create_grouping_partition_router(
3054
- self, model: GroupingPartitionRouterModel, config: Config, **kwargs: Any
3055
- ) -> GroupingPartitionRouter:
3056
- underlying_router = self._create_component_from_model(
3057
- model=model.underlying_partition_router, config=config
3058
- )
3059
- if model.group_size < 1:
3060
- raise ValueError(f"Group size must be greater than 0, got {model.group_size}")
3061
-
3062
- if not isinstance(underlying_router, PartitionRouter):
3063
- raise ValueError(
3064
- f"Underlying partition router must be a PartitionRouter subclass, got {type(underlying_router)}"
3065
- )
3066
-
3067
- if isinstance(underlying_router, SubstreamPartitionRouter):
3068
- if any(
3069
- parent_config.request_option
3070
- for parent_config in underlying_router.parent_stream_configs
3071
- ):
3072
- raise ValueError("Request options are not supported for GroupingPartitionRouter.")
3073
-
3074
- return GroupingPartitionRouter(
3075
- group_size=model.group_size,
3076
- underlying_partition_router=underlying_router,
3077
- deduplicate=model.deduplicate if model.deduplicate is not None else True,
3078
- config=config,
3079
- )
@@ -8,9 +8,6 @@ from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_route
8
8
  from airbyte_cdk.sources.declarative.partition_routers.cartesian_product_stream_slicer import (
9
9
  CartesianProductStreamSlicer,
10
10
  )
11
- from airbyte_cdk.sources.declarative.partition_routers.grouping_partition_router import (
12
- GroupingPartitionRouter,
13
- )
14
11
  from airbyte_cdk.sources.declarative.partition_routers.list_partition_router import (
15
12
  ListPartitionRouter,
16
13
  )
@@ -25,7 +22,6 @@ from airbyte_cdk.sources.declarative.partition_routers.substream_partition_route
25
22
  __all__ = [
26
23
  "AsyncJobPartitionRouter",
27
24
  "CartesianProductStreamSlicer",
28
- "GroupingPartitionRouter",
29
25
  "ListPartitionRouter",
30
26
  "SinglePartitionRouter",
31
27
  "SubstreamPartitionRouter",
@@ -1,8 +1,8 @@
1
1
  # AsyncHttpJobRepository sequence diagram
2
2
 
3
3
  - Components marked as optional are not required and can be ignored.
4
- - if `url_requester` is not provided, `urls_extractor` will get urls from the `polling_job_response`
5
- - interpolation_context, e.g. `create_job_response` or `polling_job_response` can be obtained from stream_slice
4
+ - if `download_target_requester` is not provided, `download_target_extractor` will get urls from the `polling_response`
5
+ - interpolation_context, e.g. `creation_response` or `polling_response` can be obtained from stream_slice
6
6
 
7
7
  ```mermaid
8
8
  ---
@@ -12,7 +12,7 @@ sequenceDiagram
12
12
  participant AsyncHttpJobRepository as AsyncOrchestrator
13
13
  participant CreationRequester as creation_requester
14
14
  participant PollingRequester as polling_requester
15
- participant UrlRequester as url_requester (Optional)
15
+ participant UrlRequester as download_target_requester (Optional)
16
16
  participant DownloadRetriever as download_retriever
17
17
  participant AbortRequester as abort_requester (Optional)
18
18
  participant DeleteRequester as delete_requester (Optional)
@@ -25,14 +25,14 @@ sequenceDiagram
25
25
 
26
26
  loop Poll for job status
27
27
  AsyncHttpJobRepository ->> PollingRequester: Check job status
28
- PollingRequester ->> Reporting Server: Status request (interpolation_context: `create_job_response`)
28
+ PollingRequester ->> Reporting Server: Status request (interpolation_context: `creation_response`)
29
29
  Reporting Server -->> PollingRequester: Status response
30
30
  PollingRequester -->> AsyncHttpJobRepository: Job status
31
31
  end
32
32
 
33
33
  alt Status: Ready
34
34
  AsyncHttpJobRepository ->> UrlRequester: Request download URLs (if applicable)
35
- UrlRequester ->> Reporting Server: URL request (interpolation_context: `polling_job_response`)
35
+ UrlRequester ->> Reporting Server: URL request (interpolation_context: `polling_response`)
36
36
  Reporting Server -->> UrlRequester: Download URLs
37
37
  UrlRequester -->> AsyncHttpJobRepository: Download URLs
38
38
 
@@ -23,6 +23,7 @@ from airbyte_cdk.sources.declarative.extractors.response_to_file_extractor impor
23
23
  )
24
24
  from airbyte_cdk.sources.declarative.requesters.requester import Requester
25
25
  from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever
26
+ from airbyte_cdk.sources.http_logger import format_http_message
26
27
  from airbyte_cdk.sources.types import Record, StreamSlice
27
28
  from airbyte_cdk.utils import AirbyteTracedException
28
29
 
@@ -42,13 +43,13 @@ class AsyncHttpJobRepository(AsyncJobRepository):
42
43
  delete_requester: Optional[Requester]
43
44
  status_extractor: DpathExtractor
44
45
  status_mapping: Mapping[str, AsyncJobStatus]
45
- urls_extractor: DpathExtractor
46
+ download_target_extractor: DpathExtractor
46
47
 
47
48
  job_timeout: Optional[timedelta] = None
48
49
  record_extractor: RecordExtractor = field(
49
50
  init=False, repr=False, default_factory=lambda: ResponseToFileExtractor({})
50
51
  )
51
- url_requester: Optional[Requester] = (
52
+ download_target_requester: Optional[Requester] = (
52
53
  None # use it in case polling_requester provides some <id> and extra request is needed to obtain list of urls to download from
53
54
  )
54
55
 
@@ -71,7 +72,15 @@ class AsyncHttpJobRepository(AsyncJobRepository):
71
72
  """
72
73
 
73
74
  polling_response: Optional[requests.Response] = self.polling_requester.send_request(
74
- stream_slice=stream_slice
75
+ stream_slice=stream_slice,
76
+ log_formatter=lambda polling_response: format_http_message(
77
+ response=polling_response,
78
+ title="Async Job -- Polling",
79
+ description="Poll the status of the server-side async job.",
80
+ stream_name=None,
81
+ is_auxiliary=True,
82
+ type="ASYNC_POLL",
83
+ ),
75
84
  )
76
85
  if polling_response is None:
77
86
  raise AirbyteTracedException(
@@ -118,8 +127,17 @@ class AsyncHttpJobRepository(AsyncJobRepository):
118
127
  """
119
128
 
120
129
  response: Optional[requests.Response] = self.creation_requester.send_request(
121
- stream_slice=stream_slice
130
+ stream_slice=stream_slice,
131
+ log_formatter=lambda response: format_http_message(
132
+ response=response,
133
+ title="Async Job -- Create",
134
+ description="Create the server-side async job.",
135
+ stream_name=None,
136
+ is_auxiliary=True,
137
+ type="ASYNC_CREATE",
138
+ ),
122
139
  )
140
+
123
141
  if not response:
124
142
  raise AirbyteTracedException(
125
143
  internal_message="Always expect a response or an exception from creation_requester",
@@ -193,12 +211,15 @@ class AsyncHttpJobRepository(AsyncJobRepository):
193
211
 
194
212
  """
195
213
 
196
- for url in self._get_download_url(job):
214
+ for target_url in self._get_download_targets(job):
197
215
  job_slice = job.job_parameters()
198
216
  stream_slice = StreamSlice(
199
217
  partition=job_slice.partition,
200
218
  cursor_slice=job_slice.cursor_slice,
201
- extra_fields={**job_slice.extra_fields, "url": url},
219
+ extra_fields={
220
+ **job_slice.extra_fields,
221
+ "download_target": target_url,
222
+ },
202
223
  )
203
224
  for message in self.download_retriever.read_records({}, stream_slice):
204
225
  if isinstance(message, Record):
@@ -217,13 +238,33 @@ class AsyncHttpJobRepository(AsyncJobRepository):
217
238
  if not self.abort_requester:
218
239
  return
219
240
 
220
- self.abort_requester.send_request(stream_slice=self._get_create_job_stream_slice(job))
241
+ abort_response = self.abort_requester.send_request(
242
+ stream_slice=self._get_create_job_stream_slice(job),
243
+ log_formatter=lambda abort_response: format_http_message(
244
+ response=abort_response,
245
+ title="Async Job -- Abort",
246
+ description="Abort the running server-side async job.",
247
+ stream_name=None,
248
+ is_auxiliary=True,
249
+ type="ASYNC_ABORT",
250
+ ),
251
+ )
221
252
 
222
253
  def delete(self, job: AsyncJob) -> None:
223
254
  if not self.delete_requester:
224
255
  return
225
256
 
226
- self.delete_requester.send_request(stream_slice=self._get_create_job_stream_slice(job))
257
+ delete_job_reponse = self.delete_requester.send_request(
258
+ stream_slice=self._get_create_job_stream_slice(job),
259
+ log_formatter=lambda delete_job_reponse: format_http_message(
260
+ response=delete_job_reponse,
261
+ title="Async Job -- Delete",
262
+ description="Delete the specified job from the list of Jobs.",
263
+ stream_name=None,
264
+ is_auxiliary=True,
265
+ type="ASYNC_DELETE",
266
+ ),
267
+ )
227
268
  self._clean_up_job(job.api_job_id())
228
269
 
229
270
  def _clean_up_job(self, job_id: str) -> None:
@@ -231,27 +272,29 @@ class AsyncHttpJobRepository(AsyncJobRepository):
231
272
  del self._polling_job_response_by_id[job_id]
232
273
 
233
274
  def _get_create_job_stream_slice(self, job: AsyncJob) -> StreamSlice:
275
+ creation_response = self._create_job_response_by_id[job.api_job_id()].json()
234
276
  stream_slice = StreamSlice(
235
- partition={"create_job_response": self._create_job_response_by_id[job.api_job_id()]},
277
+ partition={},
236
278
  cursor_slice={},
279
+ extra_fields={"creation_response": creation_response},
237
280
  )
238
281
  return stream_slice
239
282
 
240
- def _get_download_url(self, job: AsyncJob) -> Iterable[str]:
241
- if not self.url_requester:
283
+ def _get_download_targets(self, job: AsyncJob) -> Iterable[str]:
284
+ if not self.download_target_requester:
242
285
  url_response = self._polling_job_response_by_id[job.api_job_id()]
243
286
  else:
287
+ polling_response = self._polling_job_response_by_id[job.api_job_id()].json()
244
288
  stream_slice: StreamSlice = StreamSlice(
245
- partition={
246
- "polling_job_response": self._polling_job_response_by_id[job.api_job_id()]
247
- },
289
+ partition={},
248
290
  cursor_slice={},
291
+ extra_fields={"polling_response": polling_response},
249
292
  )
250
- url_response = self.url_requester.send_request(stream_slice=stream_slice) # type: ignore # we expect url_requester to always be presented, otherwise raise an exception as we cannot proceed with the report
293
+ url_response = self.download_target_requester.send_request(stream_slice=stream_slice) # type: ignore # we expect download_target_requester to always be presented, otherwise raise an exception as we cannot proceed with the report
251
294
  if not url_response:
252
295
  raise AirbyteTracedException(
253
- internal_message="Always expect a response or an exception from url_requester",
296
+ internal_message="Always expect a response or an exception from download_target_requester",
254
297
  failure_type=FailureType.system_error,
255
298
  )
256
299
 
257
- yield from self.urls_extractor.extract_records(url_response) # type: ignore # we expect urls_extractor to always return list of strings
300
+ yield from self.download_target_extractor.extract_records(url_response) # type: ignore # we expect download_target_extractor to always return list of strings
@@ -85,7 +85,7 @@ class HttpRequester(Requester):
85
85
  self._parameters = parameters
86
86
 
87
87
  if self.error_handler is not None and hasattr(self.error_handler, "backoff_strategies"):
88
- backoff_strategies = self.error_handler.backoff_strategies
88
+ backoff_strategies = self.error_handler.backoff_strategies # type: ignore
89
89
  else:
90
90
  backoff_strategies = None
91
91
 
@@ -125,6 +125,12 @@ class HttpRequester(Requester):
125
125
  kwargs = {
126
126
  "stream_slice": stream_slice,
127
127
  "next_page_token": next_page_token,
128
+ # update the interpolation context with extra fields, if passed.
129
+ **(
130
+ stream_slice.extra_fields
131
+ if stream_slice is not None and hasattr(stream_slice, "extra_fields")
132
+ else {}
133
+ ),
128
134
  }
129
135
  path = str(self._path.eval(self.config, **kwargs))
130
136
  return path.lstrip("/")
@@ -1,13 +1,12 @@
1
1
  # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
2
 
3
3
 
4
- from dataclasses import InitVar, dataclass
4
+ from dataclasses import InitVar, dataclass, field
5
5
  from typing import Any, Iterable, Mapping, Optional
6
6
 
7
7
  from typing_extensions import deprecated
8
8
 
9
9
  from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
10
- from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncPartition
11
10
  from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
12
11
  from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
13
12
  AsyncJobPartitionRouter,
@@ -16,6 +15,7 @@ from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
16
15
  from airbyte_cdk.sources.source import ExperimentalClassWarning
17
16
  from airbyte_cdk.sources.streams.core import StreamData
18
17
  from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
18
+ from airbyte_cdk.sources.utils.slice_logger import AlwaysLogSliceLogger
19
19
 
20
20
 
21
21
  @deprecated(
@@ -28,6 +28,10 @@ class AsyncRetriever(Retriever):
28
28
  parameters: InitVar[Mapping[str, Any]]
29
29
  record_selector: RecordSelector
30
30
  stream_slicer: AsyncJobPartitionRouter
31
+ slice_logger: AlwaysLogSliceLogger = field(
32
+ init=False,
33
+ default_factory=lambda: AlwaysLogSliceLogger(),
34
+ )
31
35
 
32
36
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
33
37
  self._parameters = parameters
@@ -75,13 +79,16 @@ class AsyncRetriever(Retriever):
75
79
  return stream_slice.extra_fields.get("jobs", []) if stream_slice else []
76
80
 
77
81
  def stream_slices(self) -> Iterable[Optional[StreamSlice]]:
78
- return self.stream_slicer.stream_slices()
82
+ yield from self.stream_slicer.stream_slices()
79
83
 
80
84
  def read_records(
81
85
  self,
82
86
  records_schema: Mapping[str, Any],
83
87
  stream_slice: Optional[StreamSlice] = None,
84
88
  ) -> Iterable[StreamData]:
89
+ # emit the slice_descriptor log message, for connector builder TestRead
90
+ yield self.slice_logger.create_slice_log_message(stream_slice.cursor_slice) # type: ignore
91
+
85
92
  stream_state: StreamState = self._get_stream_state()
86
93
  jobs: Iterable[AsyncJob] = self._validate_and_get_stream_slice_jobs(stream_slice)
87
94
  records: Iterable[Mapping[str, Any]] = self.stream_slicer.fetch_records(jobs)
@@ -6,7 +6,7 @@ import re
6
6
  from dataclasses import dataclass
7
7
  from typing import Any, Dict, List, Optional
8
8
 
9
- import unidecode
9
+ import anyascii
10
10
 
11
11
  from airbyte_cdk.sources.declarative.transformations import RecordTransformation
12
12
  from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
@@ -48,7 +48,7 @@ class KeysToSnakeCaseTransformation(RecordTransformation):
48
48
  return self.tokens_to_snake_case(tokens)
49
49
 
50
50
  def normalize_key(self, key: str) -> str:
51
- return unidecode.unidecode(key)
51
+ return str(anyascii.anyascii(key))
52
52
 
53
53
  def tokenize_key(self, key: str) -> List[str]:
54
54
  tokens = []
@@ -15,11 +15,14 @@ def format_http_message(
15
15
  description: str,
16
16
  stream_name: Optional[str],
17
17
  is_auxiliary: bool | None = None,
18
+ type: Optional[str] = None,
18
19
  ) -> LogMessage:
20
+ request_type: str = type if type else "HTTP"
19
21
  request = response.request
20
22
  log_message = {
21
23
  "http": {
22
24
  "title": title,
25
+ "type": request_type,
23
26
  "description": description,
24
27
  "request": {
25
28
  "method": request.method,