airbyte-cdk 0.52.7__py3-none-any.whl → 0.52.8__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (27) hide show
  1. airbyte_cdk/destinations/vector_db_based/config.py +1 -0
  2. airbyte_cdk/sources/abstract_source.py +12 -61
  3. airbyte_cdk/sources/message/repository.py +0 -6
  4. airbyte_cdk/sources/source.py +14 -13
  5. airbyte_cdk/sources/streams/concurrent/adapters.py +94 -21
  6. airbyte_cdk/sources/streams/concurrent/cursor.py +148 -0
  7. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +2 -3
  8. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +3 -0
  9. airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +1 -3
  10. airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py +7 -3
  11. airbyte_cdk/sources/streams/core.py +71 -1
  12. {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/METADATA +1 -1
  13. {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/RECORD +27 -25
  14. unit_tests/sources/message/test_repository.py +7 -20
  15. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +46 -5
  16. unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +154 -37
  17. unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +6 -0
  18. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +19 -3
  19. unit_tests/sources/streams/concurrent/test_adapters.py +48 -22
  20. unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +5 -4
  21. unit_tests/sources/streams/concurrent/test_cursor.py +130 -0
  22. unit_tests/sources/streams/concurrent/test_thread_based_concurrent_stream.py +14 -10
  23. unit_tests/sources/streams/test_stream_read.py +3 -1
  24. unit_tests/sources/test_abstract_source.py +12 -9
  25. {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/LICENSE.txt +0 -0
  26. {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/WHEEL +0 -0
  27. {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/top_level.txt +0 -0
@@ -12,9 +12,10 @@ from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple
12
12
 
13
13
  import airbyte_cdk.sources.utils.casing as casing
14
14
  from airbyte_cdk.models import AirbyteMessage, AirbyteStream, SyncMode
15
+ from airbyte_cdk.models import Type as MessageType
15
16
 
16
17
  # list of all possible HTTP methods which can be used for sending of request bodies
17
- from airbyte_cdk.sources.utils.schema_helpers import ResourceSchemaLoader
18
+ from airbyte_cdk.sources.utils.schema_helpers import InternalConfig, ResourceSchemaLoader
18
19
  from airbyte_cdk.sources.utils.slice_logger import SliceLogger
19
20
  from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
20
21
  from deprecated.classic import deprecated
@@ -123,6 +124,57 @@ class Stream(ABC):
123
124
  cursor_field=cursor_field,
124
125
  )
125
126
 
127
+ def read_incremental( # type: ignore # ignoring typing for ConnectorStateManager because of circular dependencies
128
+ self,
129
+ cursor_field: Optional[List[str]],
130
+ logger: logging.Logger,
131
+ slice_logger: SliceLogger,
132
+ stream_state: MutableMapping[str, Any],
133
+ state_manager,
134
+ per_stream_state_enabled: bool,
135
+ internal_config: InternalConfig,
136
+ ) -> Iterable[StreamData]:
137
+ slices = self.stream_slices(
138
+ cursor_field=cursor_field,
139
+ sync_mode=SyncMode.incremental,
140
+ stream_state=stream_state,
141
+ )
142
+ logger.debug(f"Processing stream slices for {self.name} (sync_mode: incremental)", extra={"stream_slices": slices})
143
+
144
+ has_slices = False
145
+ record_counter = 0
146
+ for _slice in slices:
147
+ has_slices = True
148
+ if slice_logger.should_log_slice_message(logger):
149
+ yield slice_logger.create_slice_log_message(_slice)
150
+ records = self.read_records(
151
+ sync_mode=SyncMode.incremental,
152
+ stream_slice=_slice,
153
+ stream_state=stream_state,
154
+ cursor_field=cursor_field or None,
155
+ )
156
+ for record_data_or_message in records:
157
+ yield record_data_or_message
158
+ if isinstance(record_data_or_message, Mapping) or (
159
+ hasattr(record_data_or_message, "type") and record_data_or_message.type == MessageType.RECORD
160
+ ):
161
+ record_data = record_data_or_message if isinstance(record_data_or_message, Mapping) else record_data_or_message.record
162
+ stream_state = self.get_updated_state(stream_state, record_data)
163
+ checkpoint_interval = self.state_checkpoint_interval
164
+ record_counter += 1
165
+ if checkpoint_interval and record_counter % checkpoint_interval == 0:
166
+ yield self._checkpoint_state(stream_state, state_manager, per_stream_state_enabled)
167
+
168
+ if internal_config.is_limit_reached(record_counter):
169
+ break
170
+
171
+ yield self._checkpoint_state(stream_state, state_manager, per_stream_state_enabled)
172
+
173
+ if not has_slices:
174
+ # Safety net to ensure we always emit at least one state message even if there are no slices
175
+ checkpoint = self._checkpoint_state(stream_state, state_manager, per_stream_state_enabled)
176
+ yield checkpoint
177
+
126
178
  @abstractmethod
127
179
  def read_records(
128
180
  self,
@@ -304,3 +356,21 @@ class Stream(ABC):
304
356
  return wrapped_keys
305
357
  else:
306
358
  raise ValueError(f"Element must be either list or str. Got: {type(keys)}")
359
+
360
+ def _checkpoint_state( # type: ignore # ignoring typing for ConnectorStateManager because of circular dependencies
361
+ self,
362
+ stream_state: Mapping[str, Any],
363
+ state_manager,
364
+ per_stream_state_enabled: bool,
365
+ ) -> AirbyteMessage:
366
+ # First attempt to retrieve the current state using the stream's state property. We receive an AttributeError if the state
367
+ # property is not implemented by the stream instance and as a fallback, use the stream_state retrieved from the stream
368
+ # instance's deprecated get_updated_state() method.
369
+ try:
370
+ state_manager.update_state_for_stream(
371
+ self.name, self.namespace, self.state # type: ignore # we know the field might not exist...
372
+ )
373
+
374
+ except AttributeError:
375
+ state_manager.update_state_for_stream(self.name, self.namespace, stream_state)
376
+ return state_manager.create_state_message(self.name, self.namespace, send_per_stream_state=per_stream_state_enabled)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 0.52.7
3
+ Version: 0.52.8
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://github.com/airbytehq/airbyte
6
6
  Author: Airbyte
@@ -13,7 +13,7 @@ airbyte_cdk/connector_builder/models.py,sha256=U2LrL1syxZ0gQ3LgnwVj9ozL6uGH5f9bi
13
13
  airbyte_cdk/destinations/__init__.py,sha256=0Uxmz3iBAyZJdk_bqUVt2pb0UwRTpFjTnFE6fQFbWKY,126
14
14
  airbyte_cdk/destinations/destination.py,sha256=_tIMnKcRQbtIsjVvNOVjfbIxgCNLuBXQwQj8MyVm3BI,5420
15
15
  airbyte_cdk/destinations/vector_db_based/__init__.py,sha256=eAkzwTjBbXBhJ5GfPO5I53Zgpv5xQFLRQS8n4nuyPt0,1006
16
- airbyte_cdk/destinations/vector_db_based/config.py,sha256=HXzXnItahHfpyzQzwcNrypW8mmzRsJTNd4v1GB-z6TU,9543
16
+ airbyte_cdk/destinations/vector_db_based/config.py,sha256=tMp8blgdrI4t7a9Ri9Vydk0TOcRqLTHHUjVlXtc0Wa4,9562
17
17
  airbyte_cdk/destinations/vector_db_based/document_processor.py,sha256=ldrlmCT4gFHc_A5B_um4OteXg1OR0LGyDmswO1316tA,8649
18
18
  airbyte_cdk/destinations/vector_db_based/embedder.py,sha256=davAE4UtrpWDjbV74tck5zvKksxizvSdF9X51WFMbW4,10913
19
19
  airbyte_cdk/destinations/vector_db_based/indexer.py,sha256=58Uf34yIe0QHbnpbkS7rH2sqL7eLzwWUjx7X4yciyeA,3165
@@ -24,11 +24,11 @@ airbyte_cdk/models/__init__.py,sha256=Kg8YHBqUsNWHlAw-u3ZGdG4dxLh7qBlHhqMRfamNCR
24
24
  airbyte_cdk/models/airbyte_protocol.py,sha256=DoJvnmGM3xMAZFTwA6_RGMiKSFqfE3ib_Ru0KJ65Ag4,100
25
25
  airbyte_cdk/models/well_known_types.py,sha256=KKfNbow2gdLoC1Z4hcXy_JR8m_acsB2ol7gQuEgjobw,117
26
26
  airbyte_cdk/sources/__init__.py,sha256=Ov7Uf03KPSZUmMZqZfUAK3tQwsdKjDQUDvTb-H0JyfA,1141
27
- airbyte_cdk/sources/abstract_source.py,sha256=0YvschwQsz3EgVaMw-WlV3WcM12_VF72MKRXaCSxaz4,16428
27
+ airbyte_cdk/sources/abstract_source.py,sha256=iw4Y5tZtyXsY0XDVPNiQiAZBp6O_zyqRMsOEhu7I7lo,13375
28
28
  airbyte_cdk/sources/config.py,sha256=PYsY7y2u3EUwxLiEb96JnuKwH_E8CuxKggsRO2ZPSRc,856
29
29
  airbyte_cdk/sources/connector_state_manager.py,sha256=wsmUgII398MazCTKxwLBLzeiU6Z-tMTrKX882EEy-YE,10904
30
30
  airbyte_cdk/sources/http_logger.py,sha256=v0kkpDtA0GUOgj6_3AayrYaBrSHBqG4t3MGbrtxaNmU,1437
31
- airbyte_cdk/sources/source.py,sha256=N3vHZzdUsBETFsql-YpO-LcgjolT_jcnAuHBhGD6Hqk,4278
31
+ airbyte_cdk/sources/source.py,sha256=dk50z8Roc28MJ8FxWe652B-GwItO__bTZqFm7WOtHnw,4412
32
32
  airbyte_cdk/sources/declarative/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
33
33
  airbyte_cdk/sources/declarative/create_partial.py,sha256=sUJOwD8hBzW4pxw2XhYlSTMgl-WMc5WpP5Oq_jo3fHw,3371
34
34
  airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=LtLvEpzKo86RzMO6n20-z4ECW6P0Yoi26HXRCSLP9K0,85049
@@ -181,24 +181,25 @@ airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=MhFB5hOo8sjwvCh8
181
181
  airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py,sha256=i-FPeK8lwCzX34GCcmvL5Yvdh8-uu7FeCVYDoFbD7IY,1920
182
182
  airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py,sha256=kuJRKgDYOGXRk0V0I8BpFxg0hGv7SfV_nBpmmn45F88,6815
183
183
  airbyte_cdk/sources/message/__init__.py,sha256=14ZSLah9uyI_CyK7_jIyq521vlgKAdihe6Ciw6-jLgU,372
184
- airbyte_cdk/sources/message/repository.py,sha256=kflbIkUwCWXMKpe6566TD_HRjRqEZKQ0h2RpxzjWLJk,4994
184
+ airbyte_cdk/sources/message/repository.py,sha256=tQOmtWxrAp1CMiOKi5SdIEWzcmgnCUYd-xL3fcupUT4,4583
185
185
  airbyte_cdk/sources/singer/__init__.py,sha256=D3zQSiWT0B9t0kKE4JPZjrcDnP2YnFNJ3dfYqSaxo9w,246
186
186
  airbyte_cdk/sources/singer/singer_helpers.py,sha256=q1LmgjFxSnN-dobMy7nikUwcK-9FvW5QQfgTqiclbAE,15649
187
187
  airbyte_cdk/sources/singer/source.py,sha256=3YY8UTOXmctvMVUnYmIegmL3_IxF55iGP_bc_s2MZdY,8530
188
188
  airbyte_cdk/sources/streams/__init__.py,sha256=IztrWN5IU_N5GCKDyRSEuoWdZohFTcgIbAIkaCFkr_Q,176
189
189
  airbyte_cdk/sources/streams/availability_strategy.py,sha256=7BM0qLvXS0QrlKvnVkBEw4Cw8i7PCENCBLcIAcuD3nY,1007
190
- airbyte_cdk/sources/streams/core.py,sha256=3tnL0JBtzasuGER_kTY7CR7soSgChDvjCuPuw241HOM,12442
190
+ airbyte_cdk/sources/streams/core.py,sha256=bIuQV7Zs9JpIyNDcfPCbyzv-BWDr_2ictK7s5AihLZQ,16025
191
191
  airbyte_cdk/sources/streams/concurrent/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
192
192
  airbyte_cdk/sources/streams/concurrent/abstract_stream.py,sha256=GCcRvUixoDOkNPy0vK37xdGxYaOfZXxVH6InzndduaE,3525
193
- airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=A3xoPEBTG8Y1GMrts1ehhxt6jlbECr5UoNzko6uGFis,14432
193
+ airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=yYpmVHwRkanyz1Pfm1dbZt_Q93pGnY8cmVKDLwiFTBM,17325
194
194
  airbyte_cdk/sources/streams/concurrent/availability_strategy.py,sha256=8xDRpfktnARBbRi_RwznvKuoGrpPF2b6tQyloMwogkM,2013
195
+ airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=z9xuKGHadIOvVA67_SzlmfkQgkaudO2Te2k8Gi9OibY,6374
195
196
  airbyte_cdk/sources/streams/concurrent/exceptions.py,sha256=-WETGIY5_QFmVeDFiqm4WhRJ_nNCkfcDwOQqx6cSqrI,365
196
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py,sha256=DFDgZD_SKOcRwm634BOp2wMW37S0S1Z3JcNvJFXnl38,1682
197
+ airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py,sha256=Xg0yeH2-aYTOvSXZrDbzGiacNr4_ARBpSjxkzdKvGoU,1602
197
198
  airbyte_cdk/sources/streams/concurrent/partition_reader.py,sha256=H8sGVVGx6uKMSUehRaqmVbE19DE3cx3NivQ4sFj8wbk,1303
198
- airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py,sha256=Qk4zs-Zho4wBqCvVtK6stw8Y79dKFmKA8FpuEkQDqyg,9811
199
+ airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py,sha256=7DRmqrnWVWwout-MR6hBb1bIP5h5H9tU8pzXkYm3EKA,10024
199
200
  airbyte_cdk/sources/streams/concurrent/partitions/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
200
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py,sha256=r0vl7IgoOnsvWNk3_-VtDWjLYm_BPQ7tmpYmXgXLuGA,977
201
- airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py,sha256=6ZLDxm2LBKmC5HD_6wpgFAEH12CkOr1Mn3azIGwYfdM,537
201
+ airbyte_cdk/sources/streams/concurrent/partitions/partition.py,sha256=tjXF8lZMvyfZaCYCHr5aTPwbVstmRjYZDwYAvLDY-ds,1312
202
+ airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py,sha256=_ymkkBr71_qt1fW0_MUqw96OfNBkeJngXQ09yolEDHw,441
202
203
  airbyte_cdk/sources/streams/concurrent/partitions/record.py,sha256=c87pzwl18pq1_3XLoKDXH_WwrskjbBnTGkxrF4uU5-A,469
203
204
  airbyte_cdk/sources/streams/concurrent/partitions/types.py,sha256=uc3aBg2kbp3mZry3RtmAwtFExKG2oQw2qG12tZWY514,849
204
205
  airbyte_cdk/sources/streams/http/__init__.py,sha256=cTP2d7Wf0hYXaN20U0dtxKa1pFZ9rI-lBbkQ0UM1apQ,261
@@ -250,7 +251,7 @@ unit_tests/singer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
250
251
  unit_tests/singer/test_singer_helpers.py,sha256=pZV6VxJuK-3-FICNGmoGbokrA_zkaFZEd4rYZCVpSRU,1762
251
252
  unit_tests/singer/test_singer_source.py,sha256=edN_kv7dnYAdBveWdUYOs74ak0dK6p8uaX225h_ZILA,4442
252
253
  unit_tests/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
253
- unit_tests/sources/test_abstract_source.py,sha256=6oo0lSfy4oKsagj-8KXGDQB8fuMvN1j9n7bXlAGCIsE,47759
254
+ unit_tests/sources/test_abstract_source.py,sha256=V7zSpOk-jqfOz8FtnImAo_zDe-Q2TjPqD_l_T0QaiDw,48179
254
255
  unit_tests/sources/test_config.py,sha256=lxjeaf48pOMF4Pf3-Z1ux_tHTyjRFCdG_hpnxw3e7uQ,2839
255
256
  unit_tests/sources/test_connector_state_manager.py,sha256=ynFxA63Cxe6t-wMMh9C6ByTlMAuk8W7H2FikDhnUEQ0,24264
256
257
  unit_tests/sources/test_http_logger.py,sha256=VT6DqgspI3DcRnoBQkkQX0z4dF_AOiYZ5P_zxmMW8oU,9004
@@ -370,22 +371,23 @@ unit_tests/sources/file_based/stream/test_default_file_based_stream.py,sha256=An
370
371
  unit_tests/sources/fixtures/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
371
372
  unit_tests/sources/fixtures/source_test_fixture.py,sha256=dvpISgio2sOp-U3bXudH_49vY4c68sO_PMs1JZTMaj0,5502
372
373
  unit_tests/sources/message/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
373
- unit_tests/sources/message/test_repository.py,sha256=zORo6lLj4L6QPpvUX4JW9XyiLSJhouFKmIzEX4Brm4M,7197
374
+ unit_tests/sources/message/test_repository.py,sha256=oiScwg4cAdnYDl7PPN1nZniDGpALz9JTPaRAuU2HD7g,6916
374
375
  unit_tests/sources/streams/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
375
376
  unit_tests/sources/streams/test_availability_strategy.py,sha256=vJrSEk9NwRghu0YsSNoMYHKWzA9UFemwyClpke8Mk2s,2315
376
- unit_tests/sources/streams/test_stream_read.py,sha256=yzVfbVdHR7jc-_4-BfA8ySNogdYqapnUayBWuPjg1j0,6768
377
+ unit_tests/sources/streams/test_stream_read.py,sha256=so1M-vf8HxSGq-MVrMymtCvK0abdaT_X9AfV9i0uVm0,6879
377
378
  unit_tests/sources/streams/test_streams_core.py,sha256=YOC7XqWFJ13Z4YuO9Nh4AR4AwpJ-s111vqPplFfpxk4,5059
378
379
  unit_tests/sources/streams/concurrent/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
379
- unit_tests/sources/streams/concurrent/test_adapters.py,sha256=gTfNEDV9ZSiH9vzyuXJRQN2fvBdwZECVyczw_B8tvPc,13027
380
- unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py,sha256=HHaSM8PoqW-46sIjW-v3NE9iQyIKgjwcQ9nscjwsiko,1260
380
+ unit_tests/sources/streams/concurrent/test_adapters.py,sha256=v8_tv2GCUk73DyoiPEPFDBrTRCsXysR-Cw-DXkQnPn4,14743
381
+ unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py,sha256=6ai_6AeRuiUFB0p5TcFGHMG2eiGFbGrmXhI41Oe5XYQ,1321
382
+ unit_tests/sources/streams/concurrent/test_cursor.py,sha256=vtKTMR4DGDKftaSny6ioKohX9bnT-mQsg_YGwkvlvwE,5107
381
383
  unit_tests/sources/streams/concurrent/test_partition_reader.py,sha256=eM5dzfmLKm9Lj-BfQUjAZRhCZzfvhk7AkKpcHGcoPfg,931
382
- unit_tests/sources/streams/concurrent/test_thread_based_concurrent_stream.py,sha256=XM8KKOLp-yZcU2rwd8qUXKGXXxbmAaIl_P6zzxwprG0,10048
384
+ unit_tests/sources/streams/concurrent/test_thread_based_concurrent_stream.py,sha256=TRWhtcJ56kIgMPRST46YCERC_DVRPkAbAzVVDZgX2bI,10285
383
385
  unit_tests/sources/streams/concurrent/scenarios/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
384
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py,sha256=8Wb4a0XglscnmeCyDHPGTwlX3y7mQU57EAk0q7AIoOg,2442
385
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py,sha256=_1lu7H8EdE7edC7XunhKMsGx6V9KzBCkyEF0B-hRSdI,11851
386
- unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py,sha256=C6rADQ3DwfwyGYP463hd46aBlJ7C3TQ8Bl6CyAvGSVc,2601
386
+ unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py,sha256=XLPC5Wqr-aBIc1tPahLKBnPHbHFw5q9rWkWCewmMeN8,4080
387
+ unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py,sha256=XqAvndFw4i3s0Kky1AFy_asA6VDUVSj-se6rEqwf94s,15820
388
+ unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py,sha256=yIIyEHn2jLkgryHTq9K5TSRTMit0ynIhjhMj0hKFItM,2961
387
389
  unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py,sha256=kiZ6VvQywg073FtrpP9AD3yIwSKbalVyfOM2Ksu3sZI,13554
388
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py,sha256=mnI799feDVAkDMjG-Qru8irUQECh6P2tMA9Kk-DHrW0,4732
390
+ unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py,sha256=G4Em5zfAd9ExzDaD-86nabxWHj9wn5HT3Mfz37UNiME,5310
389
391
  unit_tests/sources/streams/http/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
390
392
  unit_tests/sources/streams/http/test_availability_strategy.py,sha256=kuQJ5FIc4lffpHmEUVzvoN1QXQzvz8WEkFvzHItiipg,6063
391
393
  unit_tests/sources/streams/http/test_http.py,sha256=Zo5B_PPuyoCd6Cv3q8ISCxUQCwM3rFqMM-CS6TqMvRs,24746
@@ -401,8 +403,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg
401
403
  unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
402
404
  unit_tests/utils/test_stream_status_utils.py,sha256=N2TxwKge45RHUKFlPcP2o5jXYjJPKMKiu6Fm2_leZYY,3388
403
405
  unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
404
- airbyte_cdk-0.52.7.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
405
- airbyte_cdk-0.52.7.dist-info/METADATA,sha256=t9RNmgqP5HbmlaCqOyOXT2-l7UGke73oMmP_4QUmWa0,11945
406
- airbyte_cdk-0.52.7.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
407
- airbyte_cdk-0.52.7.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
408
- airbyte_cdk-0.52.7.dist-info/RECORD,,
406
+ airbyte_cdk-0.52.8.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
407
+ airbyte_cdk-0.52.8.dist-info/METADATA,sha256=dLyy4YLIXG8SQk9Z07o05xWLhnRBCIaqC52tFodjflg,11945
408
+ airbyte_cdk-0.52.8.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
409
+ airbyte_cdk-0.52.8.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
410
+ airbyte_cdk-0.52.8.dist-info/RECORD,,
@@ -5,15 +5,7 @@
5
5
  from unittest.mock import Mock
6
6
 
7
7
  import pytest
8
- from airbyte_cdk.models import (
9
- AirbyteControlConnectorConfigMessage,
10
- AirbyteControlMessage,
11
- AirbyteMessage,
12
- AirbyteStateMessage,
13
- Level,
14
- OrchestratorType,
15
- Type,
16
- )
8
+ from airbyte_cdk.models import AirbyteControlConnectorConfigMessage, AirbyteControlMessage, AirbyteMessage, Level, OrchestratorType, Type
17
9
  from airbyte_cdk.sources.message import (
18
10
  InMemoryMessageRepository,
19
11
  LogAppenderMessageRepositoryDecorator,
@@ -74,14 +66,9 @@ class TestInMemoryMessageRepository:
74
66
  second_message_generator = repo.consume_queue()
75
67
  assert list(second_message_generator) == [second_message]
76
68
 
77
- def test_given_message_is_not_control_nor_log_message_when_emit_message_then_raise_error(self):
78
- repo = InMemoryMessageRepository()
79
- with pytest.raises(ValueError):
80
- repo.emit_message(AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(data={"state": "state value"})))
81
-
82
69
  def test_given_log_level_is_severe_enough_when_log_message_then_allow_message_to_be_consumed(self):
83
70
  repo = InMemoryMessageRepository(Level.DEBUG)
84
- repo.log_message(Level.INFO, lambda: "this is a log message")
71
+ repo.log_message(Level.INFO, lambda: {"message": "this is a log message"})
85
72
  assert list(repo.consume_queue())
86
73
 
87
74
  def test_given_log_level_is_severe_enough_when_log_message_then_filter_secrets(self, mocker):
@@ -89,18 +76,18 @@ class TestInMemoryMessageRepository:
89
76
  mocker.patch("airbyte_cdk.sources.message.repository.filter_secrets", return_value=filtered_message)
90
77
  repo = InMemoryMessageRepository(Level.DEBUG)
91
78
 
92
- repo.log_message(Level.INFO, lambda: "this is a log message")
79
+ repo.log_message(Level.INFO, lambda: {"message": "this is a log message"})
93
80
 
94
81
  assert list(repo.consume_queue())[0].log.message == filtered_message
95
82
 
96
83
  def test_given_log_level_not_severe_enough_when_log_message_then_do_not_allow_message_to_be_consumed(self):
97
84
  repo = InMemoryMessageRepository(Level.ERROR)
98
- repo.log_message(Level.INFO, lambda: "this is a log message")
85
+ repo.log_message(Level.INFO, lambda: {"message": "this is a log message"})
99
86
  assert not list(repo.consume_queue())
100
87
 
101
88
  def test_given_unknown_log_level_as_threshold_when_log_message_then_allow_message_to_be_consumed(self):
102
89
  repo = InMemoryMessageRepository(UNKNOWN_LEVEL)
103
- repo.log_message(Level.DEBUG, lambda: "this is a log message")
90
+ repo.log_message(Level.DEBUG, lambda: {"message": "this is a log message"})
104
91
  assert list(repo.consume_queue())
105
92
 
106
93
  def test_given_unknown_log_level_for_log_when_log_message_then_raise_error(self):
@@ -109,14 +96,14 @@ class TestInMemoryMessageRepository:
109
96
  """
110
97
  repo = InMemoryMessageRepository(Level.ERROR)
111
98
  with pytest.raises(ValidationError):
112
- repo.log_message(UNKNOWN_LEVEL, lambda: "this is a log message")
99
+ repo.log_message(UNKNOWN_LEVEL, lambda: {"message": "this is a log message"})
113
100
 
114
101
 
115
102
  class TestNoopMessageRepository:
116
103
  def test_given_message_emitted_when_consume_queue_then_return_empty(self):
117
104
  repo = NoopMessageRepository()
118
105
  repo.emit_message(AirbyteMessage(type=Type.CONTROL, control=A_CONTROL))
119
- repo.log_message(Level.INFO, lambda: "this is a log message")
106
+ repo.log_message(Level.INFO, lambda: {"message": "this is a log message"})
120
107
 
121
108
  assert not list(repo.consume_queue())
122
109
 
@@ -6,27 +6,61 @@ from typing import Any, List, Mapping, Optional, Tuple, Union
6
6
 
7
7
  from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConnectorSpecification, DestinationSyncMode, SyncMode
8
8
  from airbyte_cdk.sources import AbstractSource
9
+ from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
9
10
  from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
10
11
  from airbyte_cdk.sources.streams import Stream
11
12
  from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
13
+ from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField, NoopCursor
12
14
  from airbyte_protocol.models import ConfiguredAirbyteStream
13
15
  from unit_tests.sources.file_based.scenarios.scenario_builder import SourceBuilder
14
16
 
17
+ _NO_STATE = None
18
+
15
19
 
16
20
  class StreamFacadeSource(AbstractSource):
17
- def __init__(self, streams: List[Stream], max_workers: int):
21
+ def __init__(
22
+ self,
23
+ streams: List[Stream],
24
+ max_workers: int,
25
+ cursor_field: Optional[CursorField] = None,
26
+ cursor_boundaries: Optional[Tuple[str, str]] = None,
27
+ ):
18
28
  self._streams = streams
19
29
  self._max_workers = max_workers
30
+ self._message_repository = InMemoryMessageRepository()
31
+ self._cursor_field = cursor_field
32
+ self._cursor_boundaries = cursor_boundaries
20
33
 
21
34
  def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
22
35
  return True, None
23
36
 
24
37
  def streams(self, config: Mapping[str, Any]) -> List[Stream]:
25
- return [StreamFacade.create_from_stream(stream, self, stream.logger, self._max_workers) for stream in self._streams]
38
+ state_manager = ConnectorStateManager(stream_instance_map={s.name: s for s in self._streams}, state=_NO_STATE)
39
+ return [
40
+ StreamFacade.create_from_stream(
41
+ stream,
42
+ self,
43
+ stream.logger,
44
+ self._max_workers,
45
+ _NO_STATE,
46
+ ConcurrentCursor(
47
+ stream.name,
48
+ stream.namespace,
49
+ _NO_STATE,
50
+ self.message_repository, # type: ignore # for this source specifically, we always return `InMemoryMessageRepository`
51
+ state_manager,
52
+ self._cursor_field,
53
+ self._cursor_boundaries,
54
+ )
55
+ if self._cursor_field
56
+ else NoopCursor(),
57
+ )
58
+ for stream in self._streams
59
+ ]
26
60
 
27
61
  @property
28
62
  def message_repository(self) -> Union[None, MessageRepository]:
29
- return InMemoryMessageRepository()
63
+ return self._message_repository
30
64
 
31
65
  def spec(self, logger: logging.Logger) -> ConnectorSpecification:
32
66
  return ConnectorSpecification(connectionSpecification={})
@@ -49,14 +83,21 @@ class StreamFacadeSourceBuilder(SourceBuilder[StreamFacadeSource]):
49
83
  self._source = None
50
84
  self._streams = []
51
85
  self._max_workers = 1
86
+ self._cursor_field = None
87
+ self._cursor_boundaries = None
52
88
 
53
89
  def set_streams(self, streams: List[Stream]) -> "StreamFacadeSourceBuilder":
54
90
  self._streams = streams
55
91
  return self
56
92
 
57
- def set_max_workers(self, max_workers: int):
93
+ def set_max_workers(self, max_workers: int) -> "StreamFacadeSourceBuilder":
58
94
  self._max_workers = max_workers
59
95
  return self
60
96
 
97
+ def set_incremental(self, cursor_field: CursorField, cursor_boundaries: Optional[Tuple[str, str]]) -> "StreamFacadeSourceBuilder":
98
+ self._cursor_field = cursor_field
99
+ self._cursor_boundaries = cursor_boundaries
100
+ return self
101
+
61
102
  def build(self, configured_catalog: Optional[Mapping[str, Any]]) -> StreamFacadeSource:
62
- return StreamFacadeSource(self._streams, self._max_workers)
103
+ return StreamFacadeSource(self._streams, self._max_workers, self._cursor_field, self._cursor_boundaries)
@@ -1,26 +1,25 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
- from typing import Any, Iterable, List, Mapping, Optional, Union
4
+ from typing import Any, Iterable, List, Mapping, Optional, Tuple, Union
5
5
 
6
6
  from airbyte_cdk.models import SyncMode
7
7
  from airbyte_cdk.sources.streams import Stream
8
+ from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
8
9
  from airbyte_cdk.sources.streams.core import StreamData
9
- from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenarioBuilder
10
+ from unit_tests.sources.file_based.scenarios.scenario_builder import IncrementalScenarioConfig, TestScenarioBuilder
10
11
  from unit_tests.sources.streams.concurrent.scenarios.stream_facade_builder import StreamFacadeSourceBuilder
11
12
 
12
13
 
13
14
  class _MockStream(Stream):
14
15
  def __init__(
15
16
  self,
16
- slice_key,
17
- slice_values_to_records_or_exception: Mapping[Optional[str], List[Union[Mapping[str, Any], Exception]]],
17
+ slices_and_records_or_exception: Iterable[Tuple[Optional[Mapping[str, Any]], Iterable[Union[Exception, Mapping[str, Any]]]]],
18
18
  name,
19
19
  json_schema,
20
20
  primary_key=None,
21
21
  ):
22
- self._slice_key = slice_key
23
- self._slice_values_to_records = slice_values_to_records_or_exception
22
+ self._slices_and_records_or_exception = slices_and_records_or_exception
24
23
  self._name = name
25
24
  self._json_schema = json_schema
26
25
  self._primary_key = primary_key
@@ -32,19 +31,12 @@ class _MockStream(Stream):
32
31
  stream_slice: Optional[Mapping[str, Any]] = None,
33
32
  stream_state: Optional[Mapping[str, Any]] = None,
34
33
  ) -> Iterable[StreamData]:
35
- for record_or_exception in self._get_record_or_exception_iterable(stream_slice):
36
- if isinstance(record_or_exception, Exception):
37
- raise record_or_exception
38
- else:
39
- yield record_or_exception
40
-
41
- def _get_record_or_exception_iterable(
42
- self, stream_slice: Optional[Mapping[str, Any]] = None
43
- ) -> Iterable[Union[Mapping[str, Any], Exception]]:
44
- if stream_slice is None:
45
- return self._slice_values_to_records[None]
46
- else:
47
- return self._slice_values_to_records[stream_slice[self._slice_key]]
34
+ for _slice, records_or_exception in self._slices_and_records_or_exception:
35
+ if stream_slice == _slice:
36
+ for item in records_or_exception:
37
+ if isinstance(item, Exception):
38
+ raise item
39
+ yield item
48
40
 
49
41
  @property
50
42
  def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
@@ -60,16 +52,16 @@ class _MockStream(Stream):
60
52
  def stream_slices(
61
53
  self, *, sync_mode: SyncMode, cursor_field: Optional[List[str]] = None, stream_state: Optional[Mapping[str, Any]] = None
62
54
  ) -> Iterable[Optional[Mapping[str, Any]]]:
63
- if self._slice_key:
64
- for slice_value in self._slice_values_to_records.keys():
65
- yield {self._slice_key: slice_value}
55
+ if self._slices_and_records_or_exception:
56
+ yield from [_slice for _slice, records_or_exception in self._slices_and_records_or_exception]
66
57
  else:
67
58
  yield None
68
59
 
69
60
 
70
61
  _stream1 = _MockStream(
71
- None,
72
- {None: [{"id": "1"}, {"id": "2"}]},
62
+ [
63
+ (None, [{"id": "1"}, {"id": "2"}]),
64
+ ],
73
65
  "stream1",
74
66
  json_schema={
75
67
  "type": "object",
@@ -80,8 +72,9 @@ _stream1 = _MockStream(
80
72
  )
81
73
 
82
74
  _stream_raising_exception = _MockStream(
83
- None,
84
- {None: [{"id": "1"}, ValueError("test exception")]},
75
+ [
76
+ (None, [{"id": "1"}, ValueError("test exception")]),
77
+ ],
85
78
  "stream1",
86
79
  json_schema={
87
80
  "type": "object",
@@ -92,8 +85,9 @@ _stream_raising_exception = _MockStream(
92
85
  )
93
86
 
94
87
  _stream_with_primary_key = _MockStream(
95
- None,
96
- {None: [{"id": "1"}, {"id": "2"}]},
88
+ [
89
+ (None, [{"id": "1"}, {"id": "2"}]),
90
+ ],
97
91
  "stream1",
98
92
  json_schema={
99
93
  "type": "object",
@@ -105,8 +99,9 @@ _stream_with_primary_key = _MockStream(
105
99
  )
106
100
 
107
101
  _stream2 = _MockStream(
108
- None,
109
- {None: [{"id": "A"}, {"id": "B"}]},
102
+ [
103
+ (None, [{"id": "A"}, {"id": "B"}]),
104
+ ],
110
105
  "stream2",
111
106
  json_schema={
112
107
  "type": "object",
@@ -117,8 +112,9 @@ _stream2 = _MockStream(
117
112
  )
118
113
 
119
114
  _stream_with_single_slice = _MockStream(
120
- "slice_key",
121
- {"s1": [{"id": "1"}, {"id": "2"}]},
115
+ [
116
+ ({"slice_key": "s1"}, [{"id": "1"}, {"id": "2"}]),
117
+ ],
122
118
  "stream1",
123
119
  json_schema={
124
120
  "type": "object",
@@ -129,11 +125,10 @@ _stream_with_single_slice = _MockStream(
129
125
  )
130
126
 
131
127
  _stream_with_multiple_slices = _MockStream(
132
- "slice_key",
133
- {
134
- "s1": [{"id": "1"}, {"id": "2"}],
135
- "s2": [{"id": "3"}, {"id": "4"}],
136
- },
128
+ [
129
+ ({"slice_key": "s1"}, [{"id": "1"}, {"id": "2"}]),
130
+ ({"slice_key": "s2"}, [{"id": "3"}, {"id": "4"}]),
131
+ ],
137
132
  "stream1",
138
133
  json_schema={
139
134
  "type": "object",
@@ -384,3 +379,125 @@ test_stream_facade_single_stream_with_multiple_slices_with_concurrency_level_two
384
379
  )
385
380
  .build()
386
381
  )
382
+
383
+
384
+ test_incremental_stream_with_slice_boundaries = (
385
+ TestScenarioBuilder()
386
+ .set_name("test_incremental_stream_with_slice_boundaries")
387
+ .set_config({})
388
+ .set_source_builder(
389
+ StreamFacadeSourceBuilder()
390
+ .set_streams(
391
+ [
392
+ _MockStream(
393
+ [
394
+ ({"from": 0, "to": 1}, [{"id": "1", "cursor_field": 0}, {"id": "2", "cursor_field": 1}]),
395
+ ({"from": 1, "to": 2}, [{"id": "3", "cursor_field": 2}, {"id": "4", "cursor_field": 3}]),
396
+ ],
397
+ "stream1",
398
+ json_schema={
399
+ "type": "object",
400
+ "properties": {
401
+ "id": {"type": ["null", "string"]},
402
+ },
403
+ },
404
+ )
405
+ ]
406
+ )
407
+ .set_incremental(CursorField("cursor_field"), ("from", "to"))
408
+ )
409
+ .set_expected_records(
410
+ [
411
+ {"data": {"id": "1", "cursor_field": 0}, "stream": "stream1"},
412
+ {"data": {"id": "2", "cursor_field": 1}, "stream": "stream1"},
413
+ {"stream1": {"slices": [{"start": 0, "end": 1}]}},
414
+ {"data": {"id": "3", "cursor_field": 2}, "stream": "stream1"},
415
+ {"data": {"id": "4", "cursor_field": 3}, "stream": "stream1"},
416
+ {"stream1": {"slices": [{"start": 0, "end": 1}, {"start": 1, "end": 2}]}},
417
+ ]
418
+ )
419
+ .set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"})
420
+ .set_incremental_scenario_config(
421
+ IncrementalScenarioConfig(
422
+ input_state=[],
423
+ )
424
+ )
425
+ .build()
426
+ )
427
+
428
+
429
+ _NO_SLICE_BOUNDARIES = None
430
+ test_incremental_stream_without_slice_boundaries = (
431
+ TestScenarioBuilder()
432
+ .set_name("test_incremental_stream_without_slice_boundaries")
433
+ .set_config({})
434
+ .set_source_builder(
435
+ StreamFacadeSourceBuilder()
436
+ .set_streams(
437
+ [
438
+ _MockStream(
439
+ [
440
+ (None, [{"id": "1", "cursor_field": 0}, {"id": "2", "cursor_field": 3}]),
441
+ ],
442
+ "stream1",
443
+ json_schema={
444
+ "type": "object",
445
+ "properties": {
446
+ "id": {"type": ["null", "string"]},
447
+ },
448
+ },
449
+ )
450
+ ]
451
+ )
452
+ .set_incremental(CursorField("cursor_field"), _NO_SLICE_BOUNDARIES)
453
+ )
454
+ .set_expected_records(
455
+ [
456
+ {"data": {"id": "1", "cursor_field": 0}, "stream": "stream1"},
457
+ {"data": {"id": "2", "cursor_field": 3}, "stream": "stream1"},
458
+ {"stream1": {"slices": [{"start": 0, "end": 3}]}},
459
+ ]
460
+ )
461
+ .set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"})
462
+ .set_incremental_scenario_config(
463
+ IncrementalScenarioConfig(
464
+ input_state=[],
465
+ )
466
+ )
467
+ .build()
468
+ )
469
+
470
+ test_incremental_stream_with_many_slices_but_without_slice_boundaries = (
471
+ TestScenarioBuilder()
472
+ .set_name("test_incremental_stream_with_many_slices_byt_without_slice_boundaries")
473
+ .set_config({})
474
+ .set_source_builder(
475
+ StreamFacadeSourceBuilder()
476
+ .set_streams(
477
+ [
478
+ _MockStream(
479
+ [
480
+ ({"parent_id": 1}, [{"id": "1", "cursor_field": 0}]),
481
+ ({"parent_id": 309}, [{"id": "3", "cursor_field": 0}]),
482
+ ],
483
+ "stream1",
484
+ json_schema={
485
+ "type": "object",
486
+ "properties": {
487
+ "id": {"type": ["null", "string"]},
488
+ },
489
+ },
490
+ )
491
+ ]
492
+ )
493
+ .set_incremental(CursorField("cursor_field"), _NO_SLICE_BOUNDARIES)
494
+ )
495
+ .set_expected_read_error(ValueError, "test exception")
496
+ .set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"})
497
+ .set_incremental_scenario_config(
498
+ IncrementalScenarioConfig(
499
+ input_state=[],
500
+ )
501
+ )
502
+ .build()
503
+ )