airbyte-cdk 6.61.6__py3-none-any.whl → 6.62.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. airbyte_cdk/connector_builder/connector_builder_handler.py +7 -7
  2. airbyte_cdk/connector_builder/main.py +2 -2
  3. airbyte_cdk/connector_builder/test_reader/reader.py +2 -2
  4. airbyte_cdk/legacy/sources/declarative/incremental/per_partition_cursor.py +4 -2
  5. airbyte_cdk/manifest_server/Dockerfile +2 -2
  6. airbyte_cdk/manifest_server/README.md +0 -22
  7. airbyte_cdk/manifest_server/app.py +0 -6
  8. airbyte_cdk/manifest_server/cli/_common.py +0 -1
  9. airbyte_cdk/manifest_server/command_processor/processor.py +5 -2
  10. airbyte_cdk/manifest_server/command_processor/utils.py +1 -1
  11. airbyte_cdk/manifest_server/routers/manifest.py +1 -1
  12. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +6 -7
  13. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +57 -7
  14. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +4 -2
  15. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +208 -278
  16. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +0 -6
  17. airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +0 -5
  18. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +0 -6
  19. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +1 -23
  20. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +0 -6
  21. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +88 -107
  22. airbyte_cdk/sources/declarative/requesters/request_options/per_partition_request_option_provider.py +95 -0
  23. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +4 -1
  24. airbyte_cdk/sources/declarative/retrievers/retriever.py +5 -0
  25. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +7 -21
  26. airbyte_cdk/sources/declarative/yaml_declarative_source.py +1 -1
  27. airbyte_cdk/sources/message/repository.py +20 -0
  28. airbyte_cdk/sources/utils/schema_helpers.py +9 -29
  29. airbyte_cdk/sources/utils/transform.py +13 -25
  30. airbyte_cdk/utils/spec_schema_transformations.py +5 -7
  31. {airbyte_cdk-6.61.6.dist-info → airbyte_cdk-6.62.0.dev1.dist-info}/METADATA +2 -4
  32. {airbyte_cdk-6.61.6.dist-info → airbyte_cdk-6.62.0.dev1.dist-info}/RECORD +36 -35
  33. {airbyte_cdk-6.61.6.dist-info → airbyte_cdk-6.62.0.dev1.dist-info}/LICENSE.txt +0 -0
  34. {airbyte_cdk-6.61.6.dist-info → airbyte_cdk-6.62.0.dev1.dist-info}/LICENSE_SHORT +0 -0
  35. {airbyte_cdk-6.61.6.dist-info → airbyte_cdk-6.62.0.dev1.dist-info}/WHEEL +0 -0
  36. {airbyte_cdk-6.61.6.dist-info → airbyte_cdk-6.62.0.dev1.dist-info}/entry_points.txt +0 -0
@@ -162,12 +162,6 @@ class CartesianProductStreamSlicer(PartitionRouter):
162
162
  partition=partition, cursor_slice=cursor_slice, extra_fields=extra_fields
163
163
  )
164
164
 
165
- def set_initial_state(self, stream_state: StreamState) -> None:
166
- """
167
- Parent stream states are not supported for cartesian product stream slicer
168
- """
169
- pass
170
-
171
165
  def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
172
166
  """
173
167
  Parent stream states are not supported for cartesian product stream slicer
@@ -140,11 +140,6 @@ class GroupingPartitionRouter(PartitionRouter):
140
140
  ) -> Mapping[str, Any]:
141
141
  return {}
142
142
 
143
- def set_initial_state(self, stream_state: StreamState) -> None:
144
- """Delegate state initialization to the underlying partition router."""
145
- self.underlying_partition_router.set_initial_state(stream_state)
146
- self._state = self.underlying_partition_router.get_stream_state()
147
-
148
143
  def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
149
144
  """Delegate state retrieval to the underlying partition router."""
150
145
  return self._state
@@ -108,12 +108,6 @@ class ListPartitionRouter(PartitionRouter):
108
108
  else:
109
109
  return {}
110
110
 
111
- def set_initial_state(self, stream_state: StreamState) -> None:
112
- """
113
- ListPartitionRouter doesn't have parent streams
114
- """
115
- pass
116
-
117
111
  def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
118
112
  """
119
113
  ListPartitionRouter doesn't have parent streams
@@ -15,31 +15,9 @@ class PartitionRouter(StreamSlicer):
15
15
  """
16
16
  Base class for partition routers.
17
17
  Methods:
18
- set_parent_state(stream_state): Set the state of the parent streams.
19
- get_parent_state(): Get the state of the parent streams.
18
+ get_stream_state(): Get the state of the parent streams.
20
19
  """
21
20
 
22
- @abstractmethod
23
- def set_initial_state(self, stream_state: StreamState) -> None:
24
- """
25
- Set the state of the parent streams.
26
-
27
- This method should only be implemented if the slicer is based on some parent stream and needs to read this stream
28
- incrementally using the state.
29
-
30
- Args:
31
- stream_state (StreamState): The state of the streams to be set. The expected format is a dictionary that includes
32
- 'parent_state' which is a dictionary of parent state names to their corresponding state.
33
- Example:
34
- {
35
- "parent_state": {
36
- "parent_stream_name_1": { ... },
37
- "parent_stream_name_2": { ... },
38
- ...
39
- }
40
- }
41
- """
42
-
43
21
  @abstractmethod
44
22
  def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
45
23
  """
@@ -50,12 +50,6 @@ class SinglePartitionRouter(PartitionRouter):
50
50
  def stream_slices(self) -> Iterable[StreamSlice]:
51
51
  yield StreamSlice(partition={}, cursor_slice={})
52
52
 
53
- def set_initial_state(self, stream_state: StreamState) -> None:
54
- """
55
- SinglePartitionRouter doesn't have parent streams
56
- """
57
- pass
58
-
59
53
  def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
60
54
  """
61
55
  SinglePartitionRouter doesn't have parent streams
@@ -7,24 +7,51 @@ import copy
7
7
  import json
8
8
  import logging
9
9
  from dataclasses import InitVar, dataclass
10
- from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union
10
+ from typing import (
11
+ TYPE_CHECKING,
12
+ Any,
13
+ Iterable,
14
+ List,
15
+ Mapping,
16
+ MutableMapping,
17
+ Optional,
18
+ TypeVar,
19
+ Union,
20
+ )
11
21
 
12
22
  import dpath
13
23
  import requests
14
24
 
15
25
  from airbyte_cdk.models import AirbyteMessage
16
- from airbyte_cdk.models import Type as MessageType
17
26
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
18
27
  from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
19
28
  from airbyte_cdk.sources.declarative.requesters.request_option import (
20
29
  RequestOption,
21
30
  RequestOptionType,
22
31
  )
23
- from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
24
- from airbyte_cdk.utils import AirbyteTracedException
32
+ from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
33
+ from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
25
34
 
26
35
  if TYPE_CHECKING:
27
- from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
36
+ from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
37
+
38
+
39
+ T = TypeVar("T")
40
+
41
+
42
+ def iterate_with_last_flag(generator: Iterable[T]) -> Iterable[tuple[T, bool]]:
43
+ iterator = iter(generator)
44
+
45
+ try:
46
+ current = next(iterator)
47
+ except StopIteration:
48
+ return # Return an empty iterator
49
+
50
+ for next_item in iterator:
51
+ yield current, False
52
+ current = next_item
53
+
54
+ yield current, True
28
55
 
29
56
 
30
57
  @dataclass
@@ -40,7 +67,7 @@ class ParentStreamConfig:
40
67
  incremental_dependency (bool): Indicates if the parent stream should be read incrementally.
41
68
  """
42
69
 
43
- stream: "DeclarativeStream" # Parent streams must be DeclarativeStream because we can't know which part of the stream slice is a partition for regular Stream
70
+ stream: "AbstractStream"
44
71
  parent_key: Union[InterpolatedString, str]
45
72
  partition_field: Union[InterpolatedString, str]
46
73
  config: Config
@@ -176,59 +203,65 @@ class SubstreamPartitionRouter(PartitionRouter):
176
203
  for field_path in parent_stream_config.extra_fields
177
204
  ]
178
205
 
179
- # read_stateless() assumes the parent is not concurrent. This is currently okay since the concurrent CDK does
180
- # not support either substreams or RFR, but something that needs to be considered once we do
181
- for parent_record in parent_stream.read_only_records():
182
- parent_partition = None
183
- # Skip non-records (eg AirbyteLogMessage)
184
- if isinstance(parent_record, AirbyteMessage):
185
- self.logger.warning(
186
- f"Parent stream {parent_stream.name} returns records of type AirbyteMessage. This SubstreamPartitionRouter is not able to checkpoint incremental parent state."
187
- )
188
- if parent_record.type == MessageType.RECORD:
189
- parent_record = parent_record.record.data # type: ignore[union-attr, assignment] # record is always a Record
190
- else:
191
- continue
192
- elif isinstance(parent_record, Record):
206
+ for partition, is_last_slice in iterate_with_last_flag(
207
+ parent_stream.generate_partitions()
208
+ ):
209
+ for parent_record, is_last_record_in_slice in iterate_with_last_flag(
210
+ partition.read()
211
+ ):
212
+ # In the previous CDK implementation, state management was done internally by the stream.
213
+ # However, this could cause issues when doing availability check for example as the availability
214
+ # check would progress the state so state management was moved outside of the read method.
215
+ # Hence, we need to call the cursor here.
216
+ # Note that we call observe and close_partition before emitting the associated record as the
217
+ # ConcurrentPerPartitionCursor will associate a record with the state of the stream after the
218
+ # record was consumed.
219
+ parent_stream.cursor.observe(parent_record)
193
220
  parent_partition = (
194
221
  parent_record.associated_slice.partition
195
222
  if parent_record.associated_slice
196
223
  else {}
197
224
  )
198
- parent_record = parent_record.data
199
- elif not isinstance(parent_record, Mapping):
200
- # The parent_record should only take the form of a Record, AirbyteMessage, or Mapping. Anything else is invalid
201
- raise AirbyteTracedException(
202
- message=f"Parent stream returned records as invalid type {type(parent_record)}"
225
+ record_data = parent_record.data
226
+
227
+ try:
228
+ partition_value = dpath.get(
229
+ record_data, # type: ignore [arg-type]
230
+ parent_field,
231
+ )
232
+ except KeyError:
233
+ # FIXME a log here would go a long way for debugging
234
+ continue
235
+
236
+ # Add extra fields
237
+ extracted_extra_fields = self._extract_extra_fields(
238
+ record_data, extra_fields
203
239
  )
204
- try:
205
- partition_value = dpath.get(
206
- parent_record, # type: ignore [arg-type]
207
- parent_field,
240
+
241
+ if parent_stream_config.lazy_read_pointer:
242
+ extracted_extra_fields = {
243
+ "child_response": self._extract_child_response(
244
+ record_data,
245
+ parent_stream_config.lazy_read_pointer, # type: ignore[arg-type] # lazy_read_pointer type handeled in __post_init__ of parent_stream_config
246
+ ),
247
+ **extracted_extra_fields,
248
+ }
249
+
250
+ if is_last_record_in_slice:
251
+ parent_stream.cursor.close_partition(partition)
252
+ if is_last_slice:
253
+ parent_stream.cursor.ensure_at_least_one_state_emitted()
254
+
255
+ yield StreamSlice(
256
+ partition={
257
+ partition_field: partition_value,
258
+ "parent_slice": parent_partition or {},
259
+ },
260
+ cursor_slice={},
261
+ extra_fields=extracted_extra_fields,
208
262
  )
209
- except KeyError:
210
- continue
211
-
212
- # Add extra fields
213
- extracted_extra_fields = self._extract_extra_fields(parent_record, extra_fields)
214
-
215
- if parent_stream_config.lazy_read_pointer:
216
- extracted_extra_fields = {
217
- "child_response": self._extract_child_response(
218
- parent_record,
219
- parent_stream_config.lazy_read_pointer, # type: ignore[arg-type] # lazy_read_pointer type handeled in __post_init__ of parent_stream_config
220
- ),
221
- **extracted_extra_fields,
222
- }
223
-
224
- yield StreamSlice(
225
- partition={
226
- partition_field: partition_value,
227
- "parent_slice": parent_partition or {},
228
- },
229
- cursor_slice={},
230
- extra_fields=extracted_extra_fields,
231
- )
263
+
264
+ yield from []
232
265
 
233
266
  def _extract_child_response(
234
267
  self, parent_record: Mapping[str, Any] | AirbyteMessage, pointer: List[InterpolatedString]
@@ -278,60 +311,6 @@ class SubstreamPartitionRouter(PartitionRouter):
278
311
  extracted_extra_fields[".".join(extra_field_path)] = extra_field_value
279
312
  return extracted_extra_fields
280
313
 
281
- def set_initial_state(self, stream_state: StreamState) -> None:
282
- """
283
- Set the state of the parent streams.
284
-
285
- If the `parent_state` key is missing from `stream_state`, migrate the child stream state to the parent stream's state format.
286
- This migration applies only to parent streams with incremental dependencies.
287
-
288
- Args:
289
- stream_state (StreamState): The state of the streams to be set.
290
-
291
- Example of state format:
292
- {
293
- "parent_state": {
294
- "parent_stream_name1": {
295
- "last_updated": "2023-05-27T00:00:00Z"
296
- },
297
- "parent_stream_name2": {
298
- "last_updated": "2023-05-27T00:00:00Z"
299
- }
300
- }
301
- }
302
-
303
- Example of migrating to parent state format:
304
- - Initial state:
305
- {
306
- "updated_at": "2023-05-27T00:00:00Z"
307
- }
308
- - After migration:
309
- {
310
- "updated_at": "2023-05-27T00:00:00Z",
311
- "parent_state": {
312
- "parent_stream_name": {
313
- "parent_stream_cursor": "2023-05-27T00:00:00Z"
314
- }
315
- }
316
- }
317
- """
318
- if not stream_state:
319
- return
320
-
321
- parent_state = stream_state.get("parent_state", {})
322
-
323
- # Set state for each parent stream with an incremental dependency
324
- for parent_config in self.parent_stream_configs:
325
- if (
326
- not parent_state.get(parent_config.stream.name, {})
327
- and parent_config.incremental_dependency
328
- ):
329
- # Migrate child state to parent state format
330
- parent_state = self._migrate_child_state_to_parent_state(stream_state)
331
-
332
- if parent_config.incremental_dependency:
333
- parent_config.stream.state = parent_state.get(parent_config.stream.name, {})
334
-
335
314
  def _migrate_child_state_to_parent_state(self, stream_state: StreamState) -> StreamState:
336
315
  """
337
316
  Migrate the child or global stream state into the parent stream's state format.
@@ -414,7 +393,9 @@ class SubstreamPartitionRouter(PartitionRouter):
414
393
  parent_state = {}
415
394
  for parent_config in self.parent_stream_configs:
416
395
  if parent_config.incremental_dependency:
417
- parent_state[parent_config.stream.name] = copy.deepcopy(parent_config.stream.state)
396
+ parent_state[parent_config.stream.name] = copy.deepcopy(
397
+ parent_config.stream.cursor.state
398
+ )
418
399
  return parent_state
419
400
 
420
401
  @property
@@ -0,0 +1,95 @@
1
+ from typing import Any, Mapping, Optional, Union
2
+
3
+ from airbyte_cdk.sources.declarative.partition_routers import PartitionRouter
4
+ from airbyte_cdk.sources.declarative.requesters.request_options import RequestOptionsProvider
5
+ from airbyte_cdk.sources.types import StreamSlice, StreamState
6
+
7
+
8
+ class PerPartitionRequestOptionsProvider(RequestOptionsProvider):
9
+ def __init__(self, partition_router: PartitionRouter, cursor_provider: RequestOptionsProvider):
10
+ self._partition_router = partition_router
11
+ self._cursor_provider = cursor_provider
12
+
13
+ def get_request_params(
14
+ self,
15
+ *,
16
+ stream_state: Optional[StreamState] = None,
17
+ stream_slice: Optional[StreamSlice] = None,
18
+ next_page_token: Optional[Mapping[str, Any]] = None,
19
+ ) -> Mapping[str, Any]:
20
+ return self._partition_router.get_request_params( # type: ignore # this always returns a mapping
21
+ stream_state=stream_state,
22
+ stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={})
23
+ if stream_slice
24
+ else StreamSlice(partition={}, cursor_slice={}),
25
+ next_page_token=next_page_token,
26
+ ) | self._cursor_provider.get_request_params(
27
+ stream_state=stream_state,
28
+ stream_slice=StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice)
29
+ if stream_slice
30
+ else StreamSlice(partition={}, cursor_slice={}),
31
+ next_page_token=next_page_token,
32
+ )
33
+
34
+ def get_request_headers(
35
+ self,
36
+ *,
37
+ stream_state: Optional[StreamState] = None,
38
+ stream_slice: Optional[StreamSlice] = None,
39
+ next_page_token: Optional[Mapping[str, Any]] = None,
40
+ ) -> Mapping[str, Any]:
41
+ return self._partition_router.get_request_headers( # type: ignore # this always returns a mapping
42
+ stream_state=stream_state,
43
+ stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={})
44
+ if stream_slice
45
+ else stream_slice,
46
+ next_page_token=next_page_token,
47
+ ) | self._cursor_provider.get_request_headers(
48
+ stream_state=stream_state,
49
+ stream_slice=StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice)
50
+ if stream_slice
51
+ else stream_slice,
52
+ next_page_token=next_page_token,
53
+ )
54
+
55
+ def get_request_body_data(
56
+ self,
57
+ *,
58
+ stream_state: Optional[StreamState] = None,
59
+ stream_slice: Optional[StreamSlice] = None,
60
+ next_page_token: Optional[Mapping[str, Any]] = None,
61
+ ) -> Union[Mapping[str, Any], str]:
62
+ return self._partition_router.get_request_body_data( # type: ignore # this always returns a mapping
63
+ stream_state=stream_state,
64
+ stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={})
65
+ if stream_slice
66
+ else stream_slice,
67
+ next_page_token=next_page_token,
68
+ ) | self._cursor_provider.get_request_body_data(
69
+ stream_state=stream_state,
70
+ stream_slice=StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice)
71
+ if stream_slice
72
+ else stream_slice,
73
+ next_page_token=next_page_token,
74
+ )
75
+
76
+ def get_request_body_json(
77
+ self,
78
+ *,
79
+ stream_state: Optional[StreamState] = None,
80
+ stream_slice: Optional[StreamSlice] = None,
81
+ next_page_token: Optional[Mapping[str, Any]] = None,
82
+ ) -> Mapping[str, Any]:
83
+ return self._partition_router.get_request_body_json( # type: ignore # this always returns a mapping
84
+ stream_state=stream_state,
85
+ stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={})
86
+ if stream_slice
87
+ else stream_slice,
88
+ next_page_token=next_page_token,
89
+ ) | self._cursor_provider.get_request_body_json(
90
+ stream_state=stream_state,
91
+ stream_slice=StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice)
92
+ if stream_slice
93
+ else stream_slice,
94
+ next_page_token=next_page_token,
95
+ )
@@ -17,6 +17,7 @@ from airbyte_cdk.sources.declarative.resolvers.components_resolver import (
17
17
  )
18
18
  from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
19
19
  from airbyte_cdk.sources.source import ExperimentalClassWarning
20
+ from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
20
21
  from airbyte_cdk.sources.types import Config
21
22
 
22
23
 
@@ -28,12 +29,14 @@ class HttpComponentsResolver(ComponentsResolver):
28
29
 
29
30
  Attributes:
30
31
  retriever (Retriever): The retriever used to fetch data from an API.
32
+ stream_slicer (StreamSlicer): The how the data is sliced.
31
33
  config (Config): Configuration object for the resolver.
32
34
  components_mapping (List[ComponentMappingDefinition]): List of mappings to resolve.
33
35
  parameters (InitVar[Mapping[str, Any]]): Additional parameters for interpolation.
34
36
  """
35
37
 
36
38
  retriever: Retriever
39
+ stream_slicer: StreamSlicer
37
40
  config: Config
38
41
  components_mapping: List[ComponentMappingDefinition]
39
42
  parameters: InitVar[Mapping[str, Any]]
@@ -88,7 +91,7 @@ class HttpComponentsResolver(ComponentsResolver):
88
91
  """
89
92
  kwargs = {"stream_template_config": stream_template_config}
90
93
 
91
- for stream_slice in self.retriever.stream_slices():
94
+ for stream_slice in self.stream_slicer.stream_slices():
92
95
  for components_values in self.retriever.read_records(
93
96
  records_schema={}, stream_slice=stream_slice
94
97
  ):
@@ -5,6 +5,8 @@
5
5
  from abc import abstractmethod
6
6
  from typing import Any, Iterable, Mapping, Optional
7
7
 
8
+ from typing_extensions import deprecated
9
+
8
10
  from airbyte_cdk.sources.streams.core import StreamData
9
11
  from airbyte_cdk.sources.types import StreamSlice, StreamState
10
12
 
@@ -29,11 +31,13 @@ class Retriever:
29
31
  """
30
32
 
31
33
  @abstractmethod
34
+ @deprecated("Stream slicing is being moved to the stream level.")
32
35
  def stream_slices(self) -> Iterable[Optional[StreamSlice]]:
33
36
  """Returns the stream slices"""
34
37
 
35
38
  @property
36
39
  @abstractmethod
40
+ @deprecated("State management is being moved to the stream level.")
37
41
  def state(self) -> StreamState:
38
42
  """State getter, should return state in form that can serialized to a string and send to the output
39
43
  as a STATE AirbyteMessage.
@@ -49,5 +53,6 @@ class Retriever:
49
53
 
50
54
  @state.setter
51
55
  @abstractmethod
56
+ @deprecated("State management is being moved to the stream level.")
52
57
  def state(self, value: StreamState) -> None:
53
58
  """State setter, accept state serialized by state getter."""
@@ -14,21 +14,10 @@ from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import Stre
14
14
  from airbyte_cdk.sources.types import Record, StreamSlice
15
15
  from airbyte_cdk.utils.slice_hasher import SliceHasher
16
16
 
17
-
18
17
  # For Connector Builder test read operations, we track the total number of records
19
- # read for the stream so that we can stop reading early if we exceed the record limit.
20
- class RecordCounter:
21
- def __init__(self) -> None:
22
- self.total_record_counter = 0
23
-
24
- def increment(self) -> None:
25
- self.total_record_counter += 1
26
-
27
- def reset(self) -> None:
28
- self.total_record_counter = 0
29
-
30
- def get_total_records(self) -> int:
31
- return self.total_record_counter
18
+ # read for the stream at the global level so that we can stop reading early if we
19
+ # exceed the record limit
20
+ total_record_counter = 0
32
21
 
33
22
 
34
23
  class SchemaLoaderCachingDecorator(SchemaLoader):
@@ -62,7 +51,6 @@ class DeclarativePartitionFactory:
62
51
  self._retriever = retriever
63
52
  self._message_repository = message_repository
64
53
  self._max_records_limit = max_records_limit
65
- self._record_counter = RecordCounter()
66
54
 
67
55
  def create(self, stream_slice: StreamSlice) -> Partition:
68
56
  return DeclarativePartition(
@@ -72,7 +60,6 @@ class DeclarativePartitionFactory:
72
60
  message_repository=self._message_repository,
73
61
  max_records_limit=self._max_records_limit,
74
62
  stream_slice=stream_slice,
75
- record_counter=self._record_counter,
76
63
  )
77
64
 
78
65
 
@@ -85,7 +72,6 @@ class DeclarativePartition(Partition):
85
72
  message_repository: MessageRepository,
86
73
  max_records_limit: Optional[int],
87
74
  stream_slice: StreamSlice,
88
- record_counter: RecordCounter,
89
75
  ):
90
76
  self._stream_name = stream_name
91
77
  self._schema_loader = schema_loader
@@ -94,17 +80,17 @@ class DeclarativePartition(Partition):
94
80
  self._max_records_limit = max_records_limit
95
81
  self._stream_slice = stream_slice
96
82
  self._hash = SliceHasher.hash(self._stream_name, self._stream_slice)
97
- self._record_counter = record_counter
98
83
 
99
84
  def read(self) -> Iterable[Record]:
100
85
  if self._max_records_limit is not None:
101
- if self._record_counter.get_total_records() >= self._max_records_limit:
86
+ global total_record_counter
87
+ if total_record_counter >= self._max_records_limit:
102
88
  return
103
89
  for stream_data in self._retriever.read_records(
104
90
  self._schema_loader.get_json_schema(), self._stream_slice
105
91
  ):
106
92
  if self._max_records_limit is not None:
107
- if self._record_counter.get_total_records() >= self._max_records_limit:
93
+ if total_record_counter >= self._max_records_limit:
108
94
  break
109
95
 
110
96
  if isinstance(stream_data, Mapping):
@@ -122,7 +108,7 @@ class DeclarativePartition(Partition):
122
108
  self._message_repository.emit_message(stream_data)
123
109
 
124
110
  if self._max_records_limit is not None:
125
- self._record_counter.increment()
111
+ total_record_counter += 1
126
112
 
127
113
  def to_slice(self) -> Optional[Mapping[str, Any]]:
128
114
  return self._stream_slice
@@ -14,7 +14,7 @@ from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
14
14
  from airbyte_cdk.sources.types import ConnectionDefinition
15
15
 
16
16
 
17
- class YamlDeclarativeSource(ConcurrentDeclarativeSource):
17
+ class YamlDeclarativeSource(ConcurrentDeclarativeSource[List[AirbyteStateMessage]]):
18
18
  """Declarative source defined by a yaml file"""
19
19
 
20
20
  def __init__(
@@ -95,6 +95,26 @@ class InMemoryMessageRepository(MessageRepository):
95
95
  yield self._message_queue.popleft()
96
96
 
97
97
 
98
+ class StateFilteringMessageRepository(MessageRepository):
99
+ """
100
+ This message repository is used when creating parent streams for SubstreamPartitionRouter. As the child stream
101
+ manages the state for both the child and the parents, we want to prevent parents from emitting state messages.
102
+ """
103
+
104
+ def __init__(self, decorated: MessageRepository) -> None:
105
+ self._decorated = decorated
106
+
107
+ def emit_message(self, message: AirbyteMessage) -> None:
108
+ if message.type != Type.STATE:
109
+ self._decorated.emit_message(message)
110
+
111
+ def log_message(self, level: Level, message_provider: Callable[[], LogMessage]) -> None:
112
+ self._decorated.log_message(level, message_provider)
113
+
114
+ def consume_queue(self) -> Iterable[AirbyteMessage]:
115
+ yield from self._decorated.consume_queue()
116
+
117
+
98
118
  class LogAppenderMessageRepositoryDecorator(MessageRepository):
99
119
  def __init__(
100
120
  self,