airbyte-cdk 0.52.7__py3-none-any.whl → 0.52.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. airbyte_cdk/destinations/vector_db_based/config.py +1 -0
  2. airbyte_cdk/sources/abstract_source.py +12 -61
  3. airbyte_cdk/sources/message/repository.py +0 -6
  4. airbyte_cdk/sources/source.py +14 -13
  5. airbyte_cdk/sources/streams/concurrent/adapters.py +94 -21
  6. airbyte_cdk/sources/streams/concurrent/cursor.py +148 -0
  7. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +2 -3
  8. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +3 -0
  9. airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +1 -3
  10. airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py +7 -3
  11. airbyte_cdk/sources/streams/core.py +71 -1
  12. {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/METADATA +1 -1
  13. {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/RECORD +27 -25
  14. unit_tests/sources/message/test_repository.py +7 -20
  15. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +46 -5
  16. unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +154 -37
  17. unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +6 -0
  18. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +19 -3
  19. unit_tests/sources/streams/concurrent/test_adapters.py +48 -22
  20. unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +5 -4
  21. unit_tests/sources/streams/concurrent/test_cursor.py +130 -0
  22. unit_tests/sources/streams/concurrent/test_thread_based_concurrent_stream.py +14 -10
  23. unit_tests/sources/streams/test_stream_read.py +3 -1
  24. unit_tests/sources/test_abstract_source.py +12 -9
  25. {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/LICENSE.txt +0 -0
  26. {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/WHEEL +0 -0
  27. {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/top_level.txt +0 -0
@@ -12,9 +12,10 @@ from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple
12
12
 
13
13
  import airbyte_cdk.sources.utils.casing as casing
14
14
  from airbyte_cdk.models import AirbyteMessage, AirbyteStream, SyncMode
15
+ from airbyte_cdk.models import Type as MessageType
15
16
 
16
17
  # list of all possible HTTP methods which can be used for sending of request bodies
17
- from airbyte_cdk.sources.utils.schema_helpers import ResourceSchemaLoader
18
+ from airbyte_cdk.sources.utils.schema_helpers import InternalConfig, ResourceSchemaLoader
18
19
  from airbyte_cdk.sources.utils.slice_logger import SliceLogger
19
20
  from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
20
21
  from deprecated.classic import deprecated
@@ -123,6 +124,57 @@ class Stream(ABC):
123
124
  cursor_field=cursor_field,
124
125
  )
125
126
 
127
+ def read_incremental( # type: ignore # ignoring typing for ConnectorStateManager because of circular dependencies
128
+ self,
129
+ cursor_field: Optional[List[str]],
130
+ logger: logging.Logger,
131
+ slice_logger: SliceLogger,
132
+ stream_state: MutableMapping[str, Any],
133
+ state_manager,
134
+ per_stream_state_enabled: bool,
135
+ internal_config: InternalConfig,
136
+ ) -> Iterable[StreamData]:
137
+ slices = self.stream_slices(
138
+ cursor_field=cursor_field,
139
+ sync_mode=SyncMode.incremental,
140
+ stream_state=stream_state,
141
+ )
142
+ logger.debug(f"Processing stream slices for {self.name} (sync_mode: incremental)", extra={"stream_slices": slices})
143
+
144
+ has_slices = False
145
+ record_counter = 0
146
+ for _slice in slices:
147
+ has_slices = True
148
+ if slice_logger.should_log_slice_message(logger):
149
+ yield slice_logger.create_slice_log_message(_slice)
150
+ records = self.read_records(
151
+ sync_mode=SyncMode.incremental,
152
+ stream_slice=_slice,
153
+ stream_state=stream_state,
154
+ cursor_field=cursor_field or None,
155
+ )
156
+ for record_data_or_message in records:
157
+ yield record_data_or_message
158
+ if isinstance(record_data_or_message, Mapping) or (
159
+ hasattr(record_data_or_message, "type") and record_data_or_message.type == MessageType.RECORD
160
+ ):
161
+ record_data = record_data_or_message if isinstance(record_data_or_message, Mapping) else record_data_or_message.record
162
+ stream_state = self.get_updated_state(stream_state, record_data)
163
+ checkpoint_interval = self.state_checkpoint_interval
164
+ record_counter += 1
165
+ if checkpoint_interval and record_counter % checkpoint_interval == 0:
166
+ yield self._checkpoint_state(stream_state, state_manager, per_stream_state_enabled)
167
+
168
+ if internal_config.is_limit_reached(record_counter):
169
+ break
170
+
171
+ yield self._checkpoint_state(stream_state, state_manager, per_stream_state_enabled)
172
+
173
+ if not has_slices:
174
+ # Safety net to ensure we always emit at least one state message even if there are no slices
175
+ checkpoint = self._checkpoint_state(stream_state, state_manager, per_stream_state_enabled)
176
+ yield checkpoint
177
+
126
178
  @abstractmethod
127
179
  def read_records(
128
180
  self,
@@ -304,3 +356,21 @@ class Stream(ABC):
304
356
  return wrapped_keys
305
357
  else:
306
358
  raise ValueError(f"Element must be either list or str. Got: {type(keys)}")
359
+
360
+ def _checkpoint_state( # type: ignore # ignoring typing for ConnectorStateManager because of circular dependencies
361
+ self,
362
+ stream_state: Mapping[str, Any],
363
+ state_manager,
364
+ per_stream_state_enabled: bool,
365
+ ) -> AirbyteMessage:
366
+ # First attempt to retrieve the current state using the stream's state property. We receive an AttributeError if the state
367
+ # property is not implemented by the stream instance and as a fallback, use the stream_state retrieved from the stream
368
+ # instance's deprecated get_updated_state() method.
369
+ try:
370
+ state_manager.update_state_for_stream(
371
+ self.name, self.namespace, self.state # type: ignore # we know the field might not exist...
372
+ )
373
+
374
+ except AttributeError:
375
+ state_manager.update_state_for_stream(self.name, self.namespace, stream_state)
376
+ return state_manager.create_state_message(self.name, self.namespace, send_per_stream_state=per_stream_state_enabled)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 0.52.7
3
+ Version: 0.52.8
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://github.com/airbytehq/airbyte
6
6
  Author: Airbyte
@@ -13,7 +13,7 @@ airbyte_cdk/connector_builder/models.py,sha256=U2LrL1syxZ0gQ3LgnwVj9ozL6uGH5f9bi
13
13
  airbyte_cdk/destinations/__init__.py,sha256=0Uxmz3iBAyZJdk_bqUVt2pb0UwRTpFjTnFE6fQFbWKY,126
14
14
  airbyte_cdk/destinations/destination.py,sha256=_tIMnKcRQbtIsjVvNOVjfbIxgCNLuBXQwQj8MyVm3BI,5420
15
15
  airbyte_cdk/destinations/vector_db_based/__init__.py,sha256=eAkzwTjBbXBhJ5GfPO5I53Zgpv5xQFLRQS8n4nuyPt0,1006
16
- airbyte_cdk/destinations/vector_db_based/config.py,sha256=HXzXnItahHfpyzQzwcNrypW8mmzRsJTNd4v1GB-z6TU,9543
16
+ airbyte_cdk/destinations/vector_db_based/config.py,sha256=tMp8blgdrI4t7a9Ri9Vydk0TOcRqLTHHUjVlXtc0Wa4,9562
17
17
  airbyte_cdk/destinations/vector_db_based/document_processor.py,sha256=ldrlmCT4gFHc_A5B_um4OteXg1OR0LGyDmswO1316tA,8649
18
18
  airbyte_cdk/destinations/vector_db_based/embedder.py,sha256=davAE4UtrpWDjbV74tck5zvKksxizvSdF9X51WFMbW4,10913
19
19
  airbyte_cdk/destinations/vector_db_based/indexer.py,sha256=58Uf34yIe0QHbnpbkS7rH2sqL7eLzwWUjx7X4yciyeA,3165
@@ -24,11 +24,11 @@ airbyte_cdk/models/__init__.py,sha256=Kg8YHBqUsNWHlAw-u3ZGdG4dxLh7qBlHhqMRfamNCR
24
24
  airbyte_cdk/models/airbyte_protocol.py,sha256=DoJvnmGM3xMAZFTwA6_RGMiKSFqfE3ib_Ru0KJ65Ag4,100
25
25
  airbyte_cdk/models/well_known_types.py,sha256=KKfNbow2gdLoC1Z4hcXy_JR8m_acsB2ol7gQuEgjobw,117
26
26
  airbyte_cdk/sources/__init__.py,sha256=Ov7Uf03KPSZUmMZqZfUAK3tQwsdKjDQUDvTb-H0JyfA,1141
27
- airbyte_cdk/sources/abstract_source.py,sha256=0YvschwQsz3EgVaMw-WlV3WcM12_VF72MKRXaCSxaz4,16428
27
+ airbyte_cdk/sources/abstract_source.py,sha256=iw4Y5tZtyXsY0XDVPNiQiAZBp6O_zyqRMsOEhu7I7lo,13375
28
28
  airbyte_cdk/sources/config.py,sha256=PYsY7y2u3EUwxLiEb96JnuKwH_E8CuxKggsRO2ZPSRc,856
29
29
  airbyte_cdk/sources/connector_state_manager.py,sha256=wsmUgII398MazCTKxwLBLzeiU6Z-tMTrKX882EEy-YE,10904
30
30
  airbyte_cdk/sources/http_logger.py,sha256=v0kkpDtA0GUOgj6_3AayrYaBrSHBqG4t3MGbrtxaNmU,1437
31
- airbyte_cdk/sources/source.py,sha256=N3vHZzdUsBETFsql-YpO-LcgjolT_jcnAuHBhGD6Hqk,4278
31
+ airbyte_cdk/sources/source.py,sha256=dk50z8Roc28MJ8FxWe652B-GwItO__bTZqFm7WOtHnw,4412
32
32
  airbyte_cdk/sources/declarative/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
33
33
  airbyte_cdk/sources/declarative/create_partial.py,sha256=sUJOwD8hBzW4pxw2XhYlSTMgl-WMc5WpP5Oq_jo3fHw,3371
34
34
  airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=LtLvEpzKo86RzMO6n20-z4ECW6P0Yoi26HXRCSLP9K0,85049
@@ -181,24 +181,25 @@ airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=MhFB5hOo8sjwvCh8
181
181
  airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py,sha256=i-FPeK8lwCzX34GCcmvL5Yvdh8-uu7FeCVYDoFbD7IY,1920
182
182
  airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py,sha256=kuJRKgDYOGXRk0V0I8BpFxg0hGv7SfV_nBpmmn45F88,6815
183
183
  airbyte_cdk/sources/message/__init__.py,sha256=14ZSLah9uyI_CyK7_jIyq521vlgKAdihe6Ciw6-jLgU,372
184
- airbyte_cdk/sources/message/repository.py,sha256=kflbIkUwCWXMKpe6566TD_HRjRqEZKQ0h2RpxzjWLJk,4994
184
+ airbyte_cdk/sources/message/repository.py,sha256=tQOmtWxrAp1CMiOKi5SdIEWzcmgnCUYd-xL3fcupUT4,4583
185
185
  airbyte_cdk/sources/singer/__init__.py,sha256=D3zQSiWT0B9t0kKE4JPZjrcDnP2YnFNJ3dfYqSaxo9w,246
186
186
  airbyte_cdk/sources/singer/singer_helpers.py,sha256=q1LmgjFxSnN-dobMy7nikUwcK-9FvW5QQfgTqiclbAE,15649
187
187
  airbyte_cdk/sources/singer/source.py,sha256=3YY8UTOXmctvMVUnYmIegmL3_IxF55iGP_bc_s2MZdY,8530
188
188
  airbyte_cdk/sources/streams/__init__.py,sha256=IztrWN5IU_N5GCKDyRSEuoWdZohFTcgIbAIkaCFkr_Q,176
189
189
  airbyte_cdk/sources/streams/availability_strategy.py,sha256=7BM0qLvXS0QrlKvnVkBEw4Cw8i7PCENCBLcIAcuD3nY,1007
190
- airbyte_cdk/sources/streams/core.py,sha256=3tnL0JBtzasuGER_kTY7CR7soSgChDvjCuPuw241HOM,12442
190
+ airbyte_cdk/sources/streams/core.py,sha256=bIuQV7Zs9JpIyNDcfPCbyzv-BWDr_2ictK7s5AihLZQ,16025
191
191
  airbyte_cdk/sources/streams/concurrent/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
192
192
  airbyte_cdk/sources/streams/concurrent/abstract_stream.py,sha256=GCcRvUixoDOkNPy0vK37xdGxYaOfZXxVH6InzndduaE,3525
193
- airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=A3xoPEBTG8Y1GMrts1ehhxt6jlbECr5UoNzko6uGFis,14432
193
+ airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=yYpmVHwRkanyz1Pfm1dbZt_Q93pGnY8cmVKDLwiFTBM,17325
194
194
  airbyte_cdk/sources/streams/concurrent/availability_strategy.py,sha256=8xDRpfktnARBbRi_RwznvKuoGrpPF2b6tQyloMwogkM,2013
195
+ airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=z9xuKGHadIOvVA67_SzlmfkQgkaudO2Te2k8Gi9OibY,6374
195
196
  airbyte_cdk/sources/streams/concurrent/exceptions.py,sha256=-WETGIY5_QFmVeDFiqm4WhRJ_nNCkfcDwOQqx6cSqrI,365
196
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py,sha256=DFDgZD_SKOcRwm634BOp2wMW37S0S1Z3JcNvJFXnl38,1682
197
+ airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py,sha256=Xg0yeH2-aYTOvSXZrDbzGiacNr4_ARBpSjxkzdKvGoU,1602
197
198
  airbyte_cdk/sources/streams/concurrent/partition_reader.py,sha256=H8sGVVGx6uKMSUehRaqmVbE19DE3cx3NivQ4sFj8wbk,1303
198
- airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py,sha256=Qk4zs-Zho4wBqCvVtK6stw8Y79dKFmKA8FpuEkQDqyg,9811
199
+ airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py,sha256=7DRmqrnWVWwout-MR6hBb1bIP5h5H9tU8pzXkYm3EKA,10024
199
200
  airbyte_cdk/sources/streams/concurrent/partitions/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
200
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py,sha256=r0vl7IgoOnsvWNk3_-VtDWjLYm_BPQ7tmpYmXgXLuGA,977
201
- airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py,sha256=6ZLDxm2LBKmC5HD_6wpgFAEH12CkOr1Mn3azIGwYfdM,537
201
+ airbyte_cdk/sources/streams/concurrent/partitions/partition.py,sha256=tjXF8lZMvyfZaCYCHr5aTPwbVstmRjYZDwYAvLDY-ds,1312
202
+ airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py,sha256=_ymkkBr71_qt1fW0_MUqw96OfNBkeJngXQ09yolEDHw,441
202
203
  airbyte_cdk/sources/streams/concurrent/partitions/record.py,sha256=c87pzwl18pq1_3XLoKDXH_WwrskjbBnTGkxrF4uU5-A,469
203
204
  airbyte_cdk/sources/streams/concurrent/partitions/types.py,sha256=uc3aBg2kbp3mZry3RtmAwtFExKG2oQw2qG12tZWY514,849
204
205
  airbyte_cdk/sources/streams/http/__init__.py,sha256=cTP2d7Wf0hYXaN20U0dtxKa1pFZ9rI-lBbkQ0UM1apQ,261
@@ -250,7 +251,7 @@ unit_tests/singer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
250
251
  unit_tests/singer/test_singer_helpers.py,sha256=pZV6VxJuK-3-FICNGmoGbokrA_zkaFZEd4rYZCVpSRU,1762
251
252
  unit_tests/singer/test_singer_source.py,sha256=edN_kv7dnYAdBveWdUYOs74ak0dK6p8uaX225h_ZILA,4442
252
253
  unit_tests/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
253
- unit_tests/sources/test_abstract_source.py,sha256=6oo0lSfy4oKsagj-8KXGDQB8fuMvN1j9n7bXlAGCIsE,47759
254
+ unit_tests/sources/test_abstract_source.py,sha256=V7zSpOk-jqfOz8FtnImAo_zDe-Q2TjPqD_l_T0QaiDw,48179
254
255
  unit_tests/sources/test_config.py,sha256=lxjeaf48pOMF4Pf3-Z1ux_tHTyjRFCdG_hpnxw3e7uQ,2839
255
256
  unit_tests/sources/test_connector_state_manager.py,sha256=ynFxA63Cxe6t-wMMh9C6ByTlMAuk8W7H2FikDhnUEQ0,24264
256
257
  unit_tests/sources/test_http_logger.py,sha256=VT6DqgspI3DcRnoBQkkQX0z4dF_AOiYZ5P_zxmMW8oU,9004
@@ -370,22 +371,23 @@ unit_tests/sources/file_based/stream/test_default_file_based_stream.py,sha256=An
370
371
  unit_tests/sources/fixtures/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
371
372
  unit_tests/sources/fixtures/source_test_fixture.py,sha256=dvpISgio2sOp-U3bXudH_49vY4c68sO_PMs1JZTMaj0,5502
372
373
  unit_tests/sources/message/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
373
- unit_tests/sources/message/test_repository.py,sha256=zORo6lLj4L6QPpvUX4JW9XyiLSJhouFKmIzEX4Brm4M,7197
374
+ unit_tests/sources/message/test_repository.py,sha256=oiScwg4cAdnYDl7PPN1nZniDGpALz9JTPaRAuU2HD7g,6916
374
375
  unit_tests/sources/streams/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
375
376
  unit_tests/sources/streams/test_availability_strategy.py,sha256=vJrSEk9NwRghu0YsSNoMYHKWzA9UFemwyClpke8Mk2s,2315
376
- unit_tests/sources/streams/test_stream_read.py,sha256=yzVfbVdHR7jc-_4-BfA8ySNogdYqapnUayBWuPjg1j0,6768
377
+ unit_tests/sources/streams/test_stream_read.py,sha256=so1M-vf8HxSGq-MVrMymtCvK0abdaT_X9AfV9i0uVm0,6879
377
378
  unit_tests/sources/streams/test_streams_core.py,sha256=YOC7XqWFJ13Z4YuO9Nh4AR4AwpJ-s111vqPplFfpxk4,5059
378
379
  unit_tests/sources/streams/concurrent/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
379
- unit_tests/sources/streams/concurrent/test_adapters.py,sha256=gTfNEDV9ZSiH9vzyuXJRQN2fvBdwZECVyczw_B8tvPc,13027
380
- unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py,sha256=HHaSM8PoqW-46sIjW-v3NE9iQyIKgjwcQ9nscjwsiko,1260
380
+ unit_tests/sources/streams/concurrent/test_adapters.py,sha256=v8_tv2GCUk73DyoiPEPFDBrTRCsXysR-Cw-DXkQnPn4,14743
381
+ unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py,sha256=6ai_6AeRuiUFB0p5TcFGHMG2eiGFbGrmXhI41Oe5XYQ,1321
382
+ unit_tests/sources/streams/concurrent/test_cursor.py,sha256=vtKTMR4DGDKftaSny6ioKohX9bnT-mQsg_YGwkvlvwE,5107
381
383
  unit_tests/sources/streams/concurrent/test_partition_reader.py,sha256=eM5dzfmLKm9Lj-BfQUjAZRhCZzfvhk7AkKpcHGcoPfg,931
382
- unit_tests/sources/streams/concurrent/test_thread_based_concurrent_stream.py,sha256=XM8KKOLp-yZcU2rwd8qUXKGXXxbmAaIl_P6zzxwprG0,10048
384
+ unit_tests/sources/streams/concurrent/test_thread_based_concurrent_stream.py,sha256=TRWhtcJ56kIgMPRST46YCERC_DVRPkAbAzVVDZgX2bI,10285
383
385
  unit_tests/sources/streams/concurrent/scenarios/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
384
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py,sha256=8Wb4a0XglscnmeCyDHPGTwlX3y7mQU57EAk0q7AIoOg,2442
385
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py,sha256=_1lu7H8EdE7edC7XunhKMsGx6V9KzBCkyEF0B-hRSdI,11851
386
- unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py,sha256=C6rADQ3DwfwyGYP463hd46aBlJ7C3TQ8Bl6CyAvGSVc,2601
386
+ unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py,sha256=XLPC5Wqr-aBIc1tPahLKBnPHbHFw5q9rWkWCewmMeN8,4080
387
+ unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py,sha256=XqAvndFw4i3s0Kky1AFy_asA6VDUVSj-se6rEqwf94s,15820
388
+ unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py,sha256=yIIyEHn2jLkgryHTq9K5TSRTMit0ynIhjhMj0hKFItM,2961
387
389
  unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py,sha256=kiZ6VvQywg073FtrpP9AD3yIwSKbalVyfOM2Ksu3sZI,13554
388
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py,sha256=mnI799feDVAkDMjG-Qru8irUQECh6P2tMA9Kk-DHrW0,4732
390
+ unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py,sha256=G4Em5zfAd9ExzDaD-86nabxWHj9wn5HT3Mfz37UNiME,5310
389
391
  unit_tests/sources/streams/http/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
390
392
  unit_tests/sources/streams/http/test_availability_strategy.py,sha256=kuQJ5FIc4lffpHmEUVzvoN1QXQzvz8WEkFvzHItiipg,6063
391
393
  unit_tests/sources/streams/http/test_http.py,sha256=Zo5B_PPuyoCd6Cv3q8ISCxUQCwM3rFqMM-CS6TqMvRs,24746
@@ -401,8 +403,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg
401
403
  unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
402
404
  unit_tests/utils/test_stream_status_utils.py,sha256=N2TxwKge45RHUKFlPcP2o5jXYjJPKMKiu6Fm2_leZYY,3388
403
405
  unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
404
- airbyte_cdk-0.52.7.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
405
- airbyte_cdk-0.52.7.dist-info/METADATA,sha256=t9RNmgqP5HbmlaCqOyOXT2-l7UGke73oMmP_4QUmWa0,11945
406
- airbyte_cdk-0.52.7.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
407
- airbyte_cdk-0.52.7.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
408
- airbyte_cdk-0.52.7.dist-info/RECORD,,
406
+ airbyte_cdk-0.52.8.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
407
+ airbyte_cdk-0.52.8.dist-info/METADATA,sha256=dLyy4YLIXG8SQk9Z07o05xWLhnRBCIaqC52tFodjflg,11945
408
+ airbyte_cdk-0.52.8.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
409
+ airbyte_cdk-0.52.8.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
410
+ airbyte_cdk-0.52.8.dist-info/RECORD,,
@@ -5,15 +5,7 @@
5
5
  from unittest.mock import Mock
6
6
 
7
7
  import pytest
8
- from airbyte_cdk.models import (
9
- AirbyteControlConnectorConfigMessage,
10
- AirbyteControlMessage,
11
- AirbyteMessage,
12
- AirbyteStateMessage,
13
- Level,
14
- OrchestratorType,
15
- Type,
16
- )
8
+ from airbyte_cdk.models import AirbyteControlConnectorConfigMessage, AirbyteControlMessage, AirbyteMessage, Level, OrchestratorType, Type
17
9
  from airbyte_cdk.sources.message import (
18
10
  InMemoryMessageRepository,
19
11
  LogAppenderMessageRepositoryDecorator,
@@ -74,14 +66,9 @@ class TestInMemoryMessageRepository:
74
66
  second_message_generator = repo.consume_queue()
75
67
  assert list(second_message_generator) == [second_message]
76
68
 
77
- def test_given_message_is_not_control_nor_log_message_when_emit_message_then_raise_error(self):
78
- repo = InMemoryMessageRepository()
79
- with pytest.raises(ValueError):
80
- repo.emit_message(AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(data={"state": "state value"})))
81
-
82
69
  def test_given_log_level_is_severe_enough_when_log_message_then_allow_message_to_be_consumed(self):
83
70
  repo = InMemoryMessageRepository(Level.DEBUG)
84
- repo.log_message(Level.INFO, lambda: "this is a log message")
71
+ repo.log_message(Level.INFO, lambda: {"message": "this is a log message"})
85
72
  assert list(repo.consume_queue())
86
73
 
87
74
  def test_given_log_level_is_severe_enough_when_log_message_then_filter_secrets(self, mocker):
@@ -89,18 +76,18 @@ class TestInMemoryMessageRepository:
89
76
  mocker.patch("airbyte_cdk.sources.message.repository.filter_secrets", return_value=filtered_message)
90
77
  repo = InMemoryMessageRepository(Level.DEBUG)
91
78
 
92
- repo.log_message(Level.INFO, lambda: "this is a log message")
79
+ repo.log_message(Level.INFO, lambda: {"message": "this is a log message"})
93
80
 
94
81
  assert list(repo.consume_queue())[0].log.message == filtered_message
95
82
 
96
83
  def test_given_log_level_not_severe_enough_when_log_message_then_do_not_allow_message_to_be_consumed(self):
97
84
  repo = InMemoryMessageRepository(Level.ERROR)
98
- repo.log_message(Level.INFO, lambda: "this is a log message")
85
+ repo.log_message(Level.INFO, lambda: {"message": "this is a log message"})
99
86
  assert not list(repo.consume_queue())
100
87
 
101
88
  def test_given_unknown_log_level_as_threshold_when_log_message_then_allow_message_to_be_consumed(self):
102
89
  repo = InMemoryMessageRepository(UNKNOWN_LEVEL)
103
- repo.log_message(Level.DEBUG, lambda: "this is a log message")
90
+ repo.log_message(Level.DEBUG, lambda: {"message": "this is a log message"})
104
91
  assert list(repo.consume_queue())
105
92
 
106
93
  def test_given_unknown_log_level_for_log_when_log_message_then_raise_error(self):
@@ -109,14 +96,14 @@ class TestInMemoryMessageRepository:
109
96
  """
110
97
  repo = InMemoryMessageRepository(Level.ERROR)
111
98
  with pytest.raises(ValidationError):
112
- repo.log_message(UNKNOWN_LEVEL, lambda: "this is a log message")
99
+ repo.log_message(UNKNOWN_LEVEL, lambda: {"message": "this is a log message"})
113
100
 
114
101
 
115
102
  class TestNoopMessageRepository:
116
103
  def test_given_message_emitted_when_consume_queue_then_return_empty(self):
117
104
  repo = NoopMessageRepository()
118
105
  repo.emit_message(AirbyteMessage(type=Type.CONTROL, control=A_CONTROL))
119
- repo.log_message(Level.INFO, lambda: "this is a log message")
106
+ repo.log_message(Level.INFO, lambda: {"message": "this is a log message"})
120
107
 
121
108
  assert not list(repo.consume_queue())
122
109
 
@@ -6,27 +6,61 @@ from typing import Any, List, Mapping, Optional, Tuple, Union
6
6
 
7
7
  from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConnectorSpecification, DestinationSyncMode, SyncMode
8
8
  from airbyte_cdk.sources import AbstractSource
9
+ from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
9
10
  from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
10
11
  from airbyte_cdk.sources.streams import Stream
11
12
  from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
13
+ from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField, NoopCursor
12
14
  from airbyte_protocol.models import ConfiguredAirbyteStream
13
15
  from unit_tests.sources.file_based.scenarios.scenario_builder import SourceBuilder
14
16
 
17
+ _NO_STATE = None
18
+
15
19
 
16
20
  class StreamFacadeSource(AbstractSource):
17
- def __init__(self, streams: List[Stream], max_workers: int):
21
+ def __init__(
22
+ self,
23
+ streams: List[Stream],
24
+ max_workers: int,
25
+ cursor_field: Optional[CursorField] = None,
26
+ cursor_boundaries: Optional[Tuple[str, str]] = None,
27
+ ):
18
28
  self._streams = streams
19
29
  self._max_workers = max_workers
30
+ self._message_repository = InMemoryMessageRepository()
31
+ self._cursor_field = cursor_field
32
+ self._cursor_boundaries = cursor_boundaries
20
33
 
21
34
  def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
22
35
  return True, None
23
36
 
24
37
  def streams(self, config: Mapping[str, Any]) -> List[Stream]:
25
- return [StreamFacade.create_from_stream(stream, self, stream.logger, self._max_workers) for stream in self._streams]
38
+ state_manager = ConnectorStateManager(stream_instance_map={s.name: s for s in self._streams}, state=_NO_STATE)
39
+ return [
40
+ StreamFacade.create_from_stream(
41
+ stream,
42
+ self,
43
+ stream.logger,
44
+ self._max_workers,
45
+ _NO_STATE,
46
+ ConcurrentCursor(
47
+ stream.name,
48
+ stream.namespace,
49
+ _NO_STATE,
50
+ self.message_repository, # type: ignore # for this source specifically, we always return `InMemoryMessageRepository`
51
+ state_manager,
52
+ self._cursor_field,
53
+ self._cursor_boundaries,
54
+ )
55
+ if self._cursor_field
56
+ else NoopCursor(),
57
+ )
58
+ for stream in self._streams
59
+ ]
26
60
 
27
61
  @property
28
62
  def message_repository(self) -> Union[None, MessageRepository]:
29
- return InMemoryMessageRepository()
63
+ return self._message_repository
30
64
 
31
65
  def spec(self, logger: logging.Logger) -> ConnectorSpecification:
32
66
  return ConnectorSpecification(connectionSpecification={})
@@ -49,14 +83,21 @@ class StreamFacadeSourceBuilder(SourceBuilder[StreamFacadeSource]):
49
83
  self._source = None
50
84
  self._streams = []
51
85
  self._max_workers = 1
86
+ self._cursor_field = None
87
+ self._cursor_boundaries = None
52
88
 
53
89
  def set_streams(self, streams: List[Stream]) -> "StreamFacadeSourceBuilder":
54
90
  self._streams = streams
55
91
  return self
56
92
 
57
- def set_max_workers(self, max_workers: int):
93
+ def set_max_workers(self, max_workers: int) -> "StreamFacadeSourceBuilder":
58
94
  self._max_workers = max_workers
59
95
  return self
60
96
 
97
+ def set_incremental(self, cursor_field: CursorField, cursor_boundaries: Optional[Tuple[str, str]]) -> "StreamFacadeSourceBuilder":
98
+ self._cursor_field = cursor_field
99
+ self._cursor_boundaries = cursor_boundaries
100
+ return self
101
+
61
102
  def build(self, configured_catalog: Optional[Mapping[str, Any]]) -> StreamFacadeSource:
62
- return StreamFacadeSource(self._streams, self._max_workers)
103
+ return StreamFacadeSource(self._streams, self._max_workers, self._cursor_field, self._cursor_boundaries)
@@ -1,26 +1,25 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
- from typing import Any, Iterable, List, Mapping, Optional, Union
4
+ from typing import Any, Iterable, List, Mapping, Optional, Tuple, Union
5
5
 
6
6
  from airbyte_cdk.models import SyncMode
7
7
  from airbyte_cdk.sources.streams import Stream
8
+ from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
8
9
  from airbyte_cdk.sources.streams.core import StreamData
9
- from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenarioBuilder
10
+ from unit_tests.sources.file_based.scenarios.scenario_builder import IncrementalScenarioConfig, TestScenarioBuilder
10
11
  from unit_tests.sources.streams.concurrent.scenarios.stream_facade_builder import StreamFacadeSourceBuilder
11
12
 
12
13
 
13
14
  class _MockStream(Stream):
14
15
  def __init__(
15
16
  self,
16
- slice_key,
17
- slice_values_to_records_or_exception: Mapping[Optional[str], List[Union[Mapping[str, Any], Exception]]],
17
+ slices_and_records_or_exception: Iterable[Tuple[Optional[Mapping[str, Any]], Iterable[Union[Exception, Mapping[str, Any]]]]],
18
18
  name,
19
19
  json_schema,
20
20
  primary_key=None,
21
21
  ):
22
- self._slice_key = slice_key
23
- self._slice_values_to_records = slice_values_to_records_or_exception
22
+ self._slices_and_records_or_exception = slices_and_records_or_exception
24
23
  self._name = name
25
24
  self._json_schema = json_schema
26
25
  self._primary_key = primary_key
@@ -32,19 +31,12 @@ class _MockStream(Stream):
32
31
  stream_slice: Optional[Mapping[str, Any]] = None,
33
32
  stream_state: Optional[Mapping[str, Any]] = None,
34
33
  ) -> Iterable[StreamData]:
35
- for record_or_exception in self._get_record_or_exception_iterable(stream_slice):
36
- if isinstance(record_or_exception, Exception):
37
- raise record_or_exception
38
- else:
39
- yield record_or_exception
40
-
41
- def _get_record_or_exception_iterable(
42
- self, stream_slice: Optional[Mapping[str, Any]] = None
43
- ) -> Iterable[Union[Mapping[str, Any], Exception]]:
44
- if stream_slice is None:
45
- return self._slice_values_to_records[None]
46
- else:
47
- return self._slice_values_to_records[stream_slice[self._slice_key]]
34
+ for _slice, records_or_exception in self._slices_and_records_or_exception:
35
+ if stream_slice == _slice:
36
+ for item in records_or_exception:
37
+ if isinstance(item, Exception):
38
+ raise item
39
+ yield item
48
40
 
49
41
  @property
50
42
  def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
@@ -60,16 +52,16 @@ class _MockStream(Stream):
60
52
  def stream_slices(
61
53
  self, *, sync_mode: SyncMode, cursor_field: Optional[List[str]] = None, stream_state: Optional[Mapping[str, Any]] = None
62
54
  ) -> Iterable[Optional[Mapping[str, Any]]]:
63
- if self._slice_key:
64
- for slice_value in self._slice_values_to_records.keys():
65
- yield {self._slice_key: slice_value}
55
+ if self._slices_and_records_or_exception:
56
+ yield from [_slice for _slice, records_or_exception in self._slices_and_records_or_exception]
66
57
  else:
67
58
  yield None
68
59
 
69
60
 
70
61
  _stream1 = _MockStream(
71
- None,
72
- {None: [{"id": "1"}, {"id": "2"}]},
62
+ [
63
+ (None, [{"id": "1"}, {"id": "2"}]),
64
+ ],
73
65
  "stream1",
74
66
  json_schema={
75
67
  "type": "object",
@@ -80,8 +72,9 @@ _stream1 = _MockStream(
80
72
  )
81
73
 
82
74
  _stream_raising_exception = _MockStream(
83
- None,
84
- {None: [{"id": "1"}, ValueError("test exception")]},
75
+ [
76
+ (None, [{"id": "1"}, ValueError("test exception")]),
77
+ ],
85
78
  "stream1",
86
79
  json_schema={
87
80
  "type": "object",
@@ -92,8 +85,9 @@ _stream_raising_exception = _MockStream(
92
85
  )
93
86
 
94
87
  _stream_with_primary_key = _MockStream(
95
- None,
96
- {None: [{"id": "1"}, {"id": "2"}]},
88
+ [
89
+ (None, [{"id": "1"}, {"id": "2"}]),
90
+ ],
97
91
  "stream1",
98
92
  json_schema={
99
93
  "type": "object",
@@ -105,8 +99,9 @@ _stream_with_primary_key = _MockStream(
105
99
  )
106
100
 
107
101
  _stream2 = _MockStream(
108
- None,
109
- {None: [{"id": "A"}, {"id": "B"}]},
102
+ [
103
+ (None, [{"id": "A"}, {"id": "B"}]),
104
+ ],
110
105
  "stream2",
111
106
  json_schema={
112
107
  "type": "object",
@@ -117,8 +112,9 @@ _stream2 = _MockStream(
117
112
  )
118
113
 
119
114
  _stream_with_single_slice = _MockStream(
120
- "slice_key",
121
- {"s1": [{"id": "1"}, {"id": "2"}]},
115
+ [
116
+ ({"slice_key": "s1"}, [{"id": "1"}, {"id": "2"}]),
117
+ ],
122
118
  "stream1",
123
119
  json_schema={
124
120
  "type": "object",
@@ -129,11 +125,10 @@ _stream_with_single_slice = _MockStream(
129
125
  )
130
126
 
131
127
  _stream_with_multiple_slices = _MockStream(
132
- "slice_key",
133
- {
134
- "s1": [{"id": "1"}, {"id": "2"}],
135
- "s2": [{"id": "3"}, {"id": "4"}],
136
- },
128
+ [
129
+ ({"slice_key": "s1"}, [{"id": "1"}, {"id": "2"}]),
130
+ ({"slice_key": "s2"}, [{"id": "3"}, {"id": "4"}]),
131
+ ],
137
132
  "stream1",
138
133
  json_schema={
139
134
  "type": "object",
@@ -384,3 +379,125 @@ test_stream_facade_single_stream_with_multiple_slices_with_concurrency_level_two
384
379
  )
385
380
  .build()
386
381
  )
382
+
383
+
384
+ test_incremental_stream_with_slice_boundaries = (
385
+ TestScenarioBuilder()
386
+ .set_name("test_incremental_stream_with_slice_boundaries")
387
+ .set_config({})
388
+ .set_source_builder(
389
+ StreamFacadeSourceBuilder()
390
+ .set_streams(
391
+ [
392
+ _MockStream(
393
+ [
394
+ ({"from": 0, "to": 1}, [{"id": "1", "cursor_field": 0}, {"id": "2", "cursor_field": 1}]),
395
+ ({"from": 1, "to": 2}, [{"id": "3", "cursor_field": 2}, {"id": "4", "cursor_field": 3}]),
396
+ ],
397
+ "stream1",
398
+ json_schema={
399
+ "type": "object",
400
+ "properties": {
401
+ "id": {"type": ["null", "string"]},
402
+ },
403
+ },
404
+ )
405
+ ]
406
+ )
407
+ .set_incremental(CursorField("cursor_field"), ("from", "to"))
408
+ )
409
+ .set_expected_records(
410
+ [
411
+ {"data": {"id": "1", "cursor_field": 0}, "stream": "stream1"},
412
+ {"data": {"id": "2", "cursor_field": 1}, "stream": "stream1"},
413
+ {"stream1": {"slices": [{"start": 0, "end": 1}]}},
414
+ {"data": {"id": "3", "cursor_field": 2}, "stream": "stream1"},
415
+ {"data": {"id": "4", "cursor_field": 3}, "stream": "stream1"},
416
+ {"stream1": {"slices": [{"start": 0, "end": 1}, {"start": 1, "end": 2}]}},
417
+ ]
418
+ )
419
+ .set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"})
420
+ .set_incremental_scenario_config(
421
+ IncrementalScenarioConfig(
422
+ input_state=[],
423
+ )
424
+ )
425
+ .build()
426
+ )
427
+
428
+
429
+ _NO_SLICE_BOUNDARIES = None
430
+ test_incremental_stream_without_slice_boundaries = (
431
+ TestScenarioBuilder()
432
+ .set_name("test_incremental_stream_without_slice_boundaries")
433
+ .set_config({})
434
+ .set_source_builder(
435
+ StreamFacadeSourceBuilder()
436
+ .set_streams(
437
+ [
438
+ _MockStream(
439
+ [
440
+ (None, [{"id": "1", "cursor_field": 0}, {"id": "2", "cursor_field": 3}]),
441
+ ],
442
+ "stream1",
443
+ json_schema={
444
+ "type": "object",
445
+ "properties": {
446
+ "id": {"type": ["null", "string"]},
447
+ },
448
+ },
449
+ )
450
+ ]
451
+ )
452
+ .set_incremental(CursorField("cursor_field"), _NO_SLICE_BOUNDARIES)
453
+ )
454
+ .set_expected_records(
455
+ [
456
+ {"data": {"id": "1", "cursor_field": 0}, "stream": "stream1"},
457
+ {"data": {"id": "2", "cursor_field": 3}, "stream": "stream1"},
458
+ {"stream1": {"slices": [{"start": 0, "end": 3}]}},
459
+ ]
460
+ )
461
+ .set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"})
462
+ .set_incremental_scenario_config(
463
+ IncrementalScenarioConfig(
464
+ input_state=[],
465
+ )
466
+ )
467
+ .build()
468
+ )
469
+
470
+ test_incremental_stream_with_many_slices_but_without_slice_boundaries = (
471
+ TestScenarioBuilder()
472
+ .set_name("test_incremental_stream_with_many_slices_byt_without_slice_boundaries")
473
+ .set_config({})
474
+ .set_source_builder(
475
+ StreamFacadeSourceBuilder()
476
+ .set_streams(
477
+ [
478
+ _MockStream(
479
+ [
480
+ ({"parent_id": 1}, [{"id": "1", "cursor_field": 0}]),
481
+ ({"parent_id": 309}, [{"id": "3", "cursor_field": 0}]),
482
+ ],
483
+ "stream1",
484
+ json_schema={
485
+ "type": "object",
486
+ "properties": {
487
+ "id": {"type": ["null", "string"]},
488
+ },
489
+ },
490
+ )
491
+ ]
492
+ )
493
+ .set_incremental(CursorField("cursor_field"), _NO_SLICE_BOUNDARIES)
494
+ )
495
+ .set_expected_read_error(ValueError, "test exception")
496
+ .set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"})
497
+ .set_incremental_scenario_config(
498
+ IncrementalScenarioConfig(
499
+ input_state=[],
500
+ )
501
+ )
502
+ .build()
503
+ )