airbyte-cdk 6.8.1rc10__py3-none-any.whl → 6.8.2.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of airbyte-cdk might be problematic. Click here for more details.
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +87 -25
 - airbyte_cdk/sources/declarative/declarative_component_schema.yaml +2 -100
 - airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
 - airbyte_cdk/sources/declarative/incremental/__init__.py +3 -0
 - airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +270 -0
 - airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +9 -0
 - airbyte_cdk/sources/declarative/manifest_declarative_source.py +2 -53
 - airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2 -95
 - airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +0 -6
 - airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +87 -106
 - airbyte_cdk/sources/declarative/partition_routers/__init__.py +1 -2
 - airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
 - airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +0 -1
 - airbyte_cdk/sources/streams/concurrent/cursor.py +9 -0
 - {airbyte_cdk-6.8.1rc10.dist-info → airbyte_cdk-6.8.2.dev1.dist-info}/METADATA +1 -1
 - {airbyte_cdk-6.8.1rc10.dist-info → airbyte_cdk-6.8.2.dev1.dist-info}/RECORD +19 -21
 - airbyte_cdk/sources/declarative/resolvers/__init__.py +0 -13
 - airbyte_cdk/sources/declarative/resolvers/components_resolver.py +0 -55
 - airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +0 -106
 - {airbyte_cdk-6.8.1rc10.dist-info → airbyte_cdk-6.8.2.dev1.dist-info}/LICENSE.txt +0 -0
 - {airbyte_cdk-6.8.1rc10.dist-info → airbyte_cdk-6.8.2.dev1.dist-info}/WHEEL +0 -0
 - {airbyte_cdk-6.8.1rc10.dist-info → airbyte_cdk-6.8.2.dev1.dist-info}/entry_points.txt +0 -0
 
| 
         @@ -20,6 +20,9 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import ( 
     | 
|
| 
       20 
20 
     | 
    
         
             
                ClientSideIncrementalRecordFilterDecorator,
         
     | 
| 
       21 
21 
     | 
    
         
             
            )
         
     | 
| 
       22 
22 
     | 
    
         
             
            from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
         
     | 
| 
      
 23 
     | 
    
         
            +
            from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
         
     | 
| 
      
 24 
     | 
    
         
            +
                PerPartitionWithGlobalCursor,
         
     | 
| 
      
 25 
     | 
    
         
            +
            )
         
     | 
| 
       23 
26 
     | 
    
         
             
            from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
         
     | 
| 
       24 
27 
     | 
    
         
             
            from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
         
     | 
| 
       25 
28 
     | 
    
         
             
            from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
         
     | 
| 
         @@ -86,10 +89,23 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]): 
     | 
|
| 
       86 
89 
     | 
    
         
             
                        component_factory=component_factory,
         
     | 
| 
       87 
90 
     | 
    
         
             
                    )
         
     | 
| 
       88 
91 
     | 
    
         | 
| 
       89 
     | 
    
         
            -
                    # todo: We could remove state from initialization. Now that streams are grouped during the read(), a source
         
     | 
| 
       90 
     | 
    
         
            -
                    #  no longer needs to store the original incoming state. But maybe there's an edge case?
         
     | 
| 
       91 
92 
     | 
    
         
             
                    self._state = state
         
     | 
| 
       92 
93 
     | 
    
         | 
| 
      
 94 
     | 
    
         
            +
                    self._concurrent_streams: Optional[List[AbstractStream]]
         
     | 
| 
      
 95 
     | 
    
         
            +
                    self._synchronous_streams: Optional[List[Stream]]
         
     | 
| 
      
 96 
     | 
    
         
            +
             
     | 
| 
      
 97 
     | 
    
         
            +
                    # If the connector command was SPEC, there is no incoming config, and we cannot instantiate streams because
         
     | 
| 
      
 98 
     | 
    
         
            +
                    # they might depend on it. Ideally we want to have a static method on this class to get the spec without
         
     | 
| 
      
 99 
     | 
    
         
            +
                    # any other arguments, but the existing entrypoint.py isn't designed to support this. Just noting this
         
     | 
| 
      
 100 
     | 
    
         
            +
                    # for our future improvements to the CDK.
         
     | 
| 
      
 101 
     | 
    
         
            +
                    if config:
         
     | 
| 
      
 102 
     | 
    
         
            +
                        self._concurrent_streams, self._synchronous_streams = self._group_streams(
         
     | 
| 
      
 103 
     | 
    
         
            +
                            config=config or {}
         
     | 
| 
      
 104 
     | 
    
         
            +
                        )
         
     | 
| 
      
 105 
     | 
    
         
            +
                    else:
         
     | 
| 
      
 106 
     | 
    
         
            +
                        self._concurrent_streams = None
         
     | 
| 
      
 107 
     | 
    
         
            +
                        self._synchronous_streams = None
         
     | 
| 
      
 108 
     | 
    
         
            +
             
     | 
| 
       93 
109 
     | 
    
         
             
                    concurrency_level_from_manifest = self._source_config.get("concurrency_level")
         
     | 
| 
       94 
110 
     | 
    
         
             
                    if concurrency_level_from_manifest:
         
     | 
| 
       95 
111 
     | 
    
         
             
                        concurrency_level_component = self._constructor.create_component(
         
     | 
| 
         @@ -123,20 +139,17 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]): 
     | 
|
| 
       123 
139 
     | 
    
         
             
                    logger: logging.Logger,
         
     | 
| 
       124 
140 
     | 
    
         
             
                    config: Mapping[str, Any],
         
     | 
| 
       125 
141 
     | 
    
         
             
                    catalog: ConfiguredAirbyteCatalog,
         
     | 
| 
       126 
     | 
    
         
            -
                    state: Optional[List[AirbyteStateMessage]] = None,
         
     | 
| 
      
 142 
     | 
    
         
            +
                    state: Optional[Union[List[AirbyteStateMessage]]] = None,
         
     | 
| 
       127 
143 
     | 
    
         
             
                ) -> Iterator[AirbyteMessage]:
         
     | 
| 
       128 
     | 
    
         
            -
                     
     | 
| 
       129 
     | 
    
         
            -
             
     | 
| 
       130 
     | 
    
         
            -
                     
     | 
| 
       131 
     | 
    
         
            -
                    # the concurrent streams must be saved so that they can be removed from the catalog before starting
         
     | 
| 
       132 
     | 
    
         
            -
                    # synchronous streams
         
     | 
| 
       133 
     | 
    
         
            -
                    if len(concurrent_streams) > 0:
         
     | 
| 
      
 144 
     | 
    
         
            +
                    # ConcurrentReadProcessor pops streams that are finished being read so before syncing, the names of the concurrent
         
     | 
| 
      
 145 
     | 
    
         
            +
                    # streams must be saved so that they can be removed from the catalog before starting synchronous streams
         
     | 
| 
      
 146 
     | 
    
         
            +
                    if self._concurrent_streams:
         
     | 
| 
       134 
147 
     | 
    
         
             
                        concurrent_stream_names = set(
         
     | 
| 
       135 
     | 
    
         
            -
                            [concurrent_stream.name for concurrent_stream in  
     | 
| 
      
 148 
     | 
    
         
            +
                            [concurrent_stream.name for concurrent_stream in self._concurrent_streams]
         
     | 
| 
       136 
149 
     | 
    
         
             
                        )
         
     | 
| 
       137 
150 
     | 
    
         | 
| 
       138 
151 
     | 
    
         
             
                        selected_concurrent_streams = self._select_streams(
         
     | 
| 
       139 
     | 
    
         
            -
                            streams= 
     | 
| 
      
 152 
     | 
    
         
            +
                            streams=self._concurrent_streams, configured_catalog=catalog
         
     | 
| 
       140 
153 
     | 
    
         
             
                        )
         
     | 
| 
       141 
154 
     | 
    
         
             
                        # It would appear that passing in an empty set of streams causes an infinite loop in ConcurrentReadProcessor.
         
     | 
| 
       142 
155 
     | 
    
         
             
                        # This is also evident in concurrent_source_adapter.py so I'll leave this out of scope to fix for now
         
     | 
| 
         @@ -155,7 +168,8 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]): 
     | 
|
| 
       155 
168 
     | 
    
         
             
                    yield from super().read(logger, config, filtered_catalog, state)
         
     | 
| 
       156 
169 
     | 
    
         | 
| 
       157 
170 
     | 
    
         
             
                def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog:
         
     | 
| 
       158 
     | 
    
         
            -
                    concurrent_streams 
     | 
| 
      
 171 
     | 
    
         
            +
                    concurrent_streams = self._concurrent_streams or []
         
     | 
| 
      
 172 
     | 
    
         
            +
                    synchronous_streams = self._synchronous_streams or []
         
     | 
| 
       159 
173 
     | 
    
         
             
                    return AirbyteCatalog(
         
     | 
| 
       160 
174 
     | 
    
         
             
                        streams=[
         
     | 
| 
       161 
175 
     | 
    
         
             
                            stream.as_airbyte_stream() for stream in concurrent_streams + synchronous_streams
         
     | 
| 
         @@ -181,13 +195,9 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]): 
     | 
|
| 
       181 
195 
     | 
    
         | 
| 
       182 
196 
     | 
    
         
             
                    state_manager = ConnectorStateManager(state=self._state)  # type: ignore  # state is always in the form of List[AirbyteStateMessage]. The ConnectorStateManager should use generics, but this can be done later
         
     | 
| 
       183 
197 
     | 
    
         | 
| 
       184 
     | 
    
         
            -
                     
     | 
| 
       185 
     | 
    
         
            -
             
     | 
| 
       186 
     | 
    
         
            -
                     
     | 
| 
       187 
     | 
    
         
            -
                        self._source_config, config
         
     | 
| 
       188 
     | 
    
         
            -
                    )
         
     | 
| 
       189 
     | 
    
         
            -
             
     | 
| 
       190 
     | 
    
         
            -
                    name_to_stream_mapping = {stream["name"]: stream for stream in streams}
         
     | 
| 
      
 198 
     | 
    
         
            +
                    name_to_stream_mapping = {
         
     | 
| 
      
 199 
     | 
    
         
            +
                        stream["name"]: stream for stream in self.resolved_manifest["streams"]
         
     | 
| 
      
 200 
     | 
    
         
            +
                    }
         
     | 
| 
       191 
201 
     | 
    
         | 
| 
       192 
202 
     | 
    
         
             
                    for declarative_stream in self.streams(config=config):
         
     | 
| 
       193 
203 
     | 
    
         
             
                        # Some low-code sources use a combination of DeclarativeStream and regular Python streams. We can't inspect
         
     | 
| 
         @@ -195,7 +205,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]): 
     | 
|
| 
       195 
205 
     | 
    
         
             
                        # so we need to treat them as synchronous
         
     | 
| 
       196 
206 
     | 
    
         
             
                        if (
         
     | 
| 
       197 
207 
     | 
    
         
             
                            isinstance(declarative_stream, DeclarativeStream)
         
     | 
| 
       198 
     | 
    
         
            -
                            and name_to_stream_mapping[declarative_stream.name] 
     | 
| 
      
 208 
     | 
    
         
            +
                            and name_to_stream_mapping[declarative_stream.name].get("retriever")["type"]
         
     | 
| 
       199 
209 
     | 
    
         
             
                            == "SimpleRetriever"
         
     | 
| 
       200 
210 
     | 
    
         
             
                        ):
         
     | 
| 
       201 
211 
     | 
    
         
             
                            incremental_sync_component_definition = name_to_stream_mapping[
         
     | 
| 
         @@ -204,7 +214,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]): 
     | 
|
| 
       204 
214 
     | 
    
         | 
| 
       205 
215 
     | 
    
         
             
                            partition_router_component_definition = (
         
     | 
| 
       206 
216 
     | 
    
         
             
                                name_to_stream_mapping[declarative_stream.name]
         
     | 
| 
       207 
     | 
    
         
            -
                                .get("retriever" 
     | 
| 
      
 217 
     | 
    
         
            +
                                .get("retriever")
         
     | 
| 
       208 
218 
     | 
    
         
             
                                .get("partition_router")
         
     | 
| 
       209 
219 
     | 
    
         
             
                            )
         
     | 
| 
       210 
220 
     | 
    
         
             
                            is_without_partition_router_or_cursor = not bool(
         
     | 
| 
         @@ -226,7 +236,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]): 
     | 
|
| 
       226 
236 
     | 
    
         
             
                                cursor = self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
         
     | 
| 
       227 
237 
     | 
    
         
             
                                    state_manager=state_manager,
         
     | 
| 
       228 
238 
     | 
    
         
             
                                    model_type=DatetimeBasedCursorModel,
         
     | 
| 
       229 
     | 
    
         
            -
                                    component_definition=incremental_sync_component_definition, 
     | 
| 
      
 239 
     | 
    
         
            +
                                    component_definition=incremental_sync_component_definition,
         
     | 
| 
       230 
240 
     | 
    
         
             
                                    stream_name=declarative_stream.name,
         
     | 
| 
       231 
241 
     | 
    
         
             
                                    stream_namespace=declarative_stream.namespace,
         
     | 
| 
       232 
242 
     | 
    
         
             
                                    config=config or {},
         
     | 
| 
         @@ -299,6 +309,59 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]): 
     | 
|
| 
       299 
309 
     | 
    
         
             
                                        cursor=final_state_cursor,
         
     | 
| 
       300 
310 
     | 
    
         
             
                                    )
         
     | 
| 
       301 
311 
     | 
    
         
             
                                )
         
     | 
| 
      
 312 
     | 
    
         
            +
                            elif (
         
     | 
| 
      
 313 
     | 
    
         
            +
                                incremental_sync_component_definition
         
     | 
| 
      
 314 
     | 
    
         
            +
                                and incremental_sync_component_definition.get("type", "")
         
     | 
| 
      
 315 
     | 
    
         
            +
                                == DatetimeBasedCursorModel.__name__
         
     | 
| 
      
 316 
     | 
    
         
            +
                                and self._stream_supports_concurrent_partition_processing(
         
     | 
| 
      
 317 
     | 
    
         
            +
                                    declarative_stream=declarative_stream
         
     | 
| 
      
 318 
     | 
    
         
            +
                                )
         
     | 
| 
      
 319 
     | 
    
         
            +
                                and hasattr(declarative_stream.retriever, "stream_slicer")
         
     | 
| 
      
 320 
     | 
    
         
            +
                                and isinstance(declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor)
         
     | 
| 
      
 321 
     | 
    
         
            +
                            ):
         
     | 
| 
      
 322 
     | 
    
         
            +
                                stream_state = state_manager.get_stream_state(
         
     | 
| 
      
 323 
     | 
    
         
            +
                                    stream_name=declarative_stream.name, namespace=declarative_stream.namespace
         
     | 
| 
      
 324 
     | 
    
         
            +
                                )
         
     | 
| 
      
 325 
     | 
    
         
            +
                                partition_router = declarative_stream.retriever.stream_slicer._partition_router
         
     | 
| 
      
 326 
     | 
    
         
            +
             
     | 
| 
      
 327 
     | 
    
         
            +
                                cursor = self._constructor.create_concurrent_cursor_from_perpartition_cursor(
         
     | 
| 
      
 328 
     | 
    
         
            +
                                        state_manager=state_manager,
         
     | 
| 
      
 329 
     | 
    
         
            +
                                        model_type=DatetimeBasedCursorModel,
         
     | 
| 
      
 330 
     | 
    
         
            +
                                        component_definition=incremental_sync_component_definition,
         
     | 
| 
      
 331 
     | 
    
         
            +
                                        stream_name=declarative_stream.name,
         
     | 
| 
      
 332 
     | 
    
         
            +
                                        stream_namespace=declarative_stream.namespace,
         
     | 
| 
      
 333 
     | 
    
         
            +
                                        config=config or {},
         
     | 
| 
      
 334 
     | 
    
         
            +
                                        stream_state=stream_state,
         
     | 
| 
      
 335 
     | 
    
         
            +
                                        partition_router=partition_router,
         
     | 
| 
      
 336 
     | 
    
         
            +
                                    )
         
     | 
| 
      
 337 
     | 
    
         
            +
             
     | 
| 
      
 338 
     | 
    
         
            +
             
     | 
| 
      
 339 
     | 
    
         
            +
                                partition_generator = StreamSlicerPartitionGenerator(
         
     | 
| 
      
 340 
     | 
    
         
            +
                                    DeclarativePartitionFactory(
         
     | 
| 
      
 341 
     | 
    
         
            +
                                        declarative_stream.name,
         
     | 
| 
      
 342 
     | 
    
         
            +
                                        declarative_stream.get_json_schema(),
         
     | 
| 
      
 343 
     | 
    
         
            +
                                        self._retriever_factory(
         
     | 
| 
      
 344 
     | 
    
         
            +
                                            name_to_stream_mapping[declarative_stream.name],
         
     | 
| 
      
 345 
     | 
    
         
            +
                                            config,
         
     | 
| 
      
 346 
     | 
    
         
            +
                                            stream_state,
         
     | 
| 
      
 347 
     | 
    
         
            +
                                        ),
         
     | 
| 
      
 348 
     | 
    
         
            +
                                        self.message_repository,
         
     | 
| 
      
 349 
     | 
    
         
            +
                                    ),
         
     | 
| 
      
 350 
     | 
    
         
            +
                                    cursor,
         
     | 
| 
      
 351 
     | 
    
         
            +
                                )
         
     | 
| 
      
 352 
     | 
    
         
            +
             
     | 
| 
      
 353 
     | 
    
         
            +
                                concurrent_streams.append(
         
     | 
| 
      
 354 
     | 
    
         
            +
                                    DefaultStream(
         
     | 
| 
      
 355 
     | 
    
         
            +
                                        partition_generator=partition_generator,
         
     | 
| 
      
 356 
     | 
    
         
            +
                                        name=declarative_stream.name,
         
     | 
| 
      
 357 
     | 
    
         
            +
                                        json_schema=declarative_stream.get_json_schema(),
         
     | 
| 
      
 358 
     | 
    
         
            +
                                        availability_strategy=AlwaysAvailableAvailabilityStrategy(),
         
     | 
| 
      
 359 
     | 
    
         
            +
                                        primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
         
     | 
| 
      
 360 
     | 
    
         
            +
                                        cursor_field=cursor.cursor_field.cursor_field_key,
         
     | 
| 
      
 361 
     | 
    
         
            +
                                        logger=self.logger,
         
     | 
| 
      
 362 
     | 
    
         
            +
                                        cursor=cursor,
         
     | 
| 
      
 363 
     | 
    
         
            +
                                    )
         
     | 
| 
      
 364 
     | 
    
         
            +
                                )
         
     | 
| 
       302 
365 
     | 
    
         
             
                            else:
         
     | 
| 
       303 
366 
     | 
    
         
             
                                synchronous_streams.append(declarative_stream)
         
     | 
| 
       304 
367 
     | 
    
         
             
                        else:
         
     | 
| 
         @@ -309,11 +372,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]): 
     | 
|
| 
       309 
372 
     | 
    
         
             
                def _is_datetime_incremental_without_partition_routing(
         
     | 
| 
       310 
373 
     | 
    
         
             
                    self,
         
     | 
| 
       311 
374 
     | 
    
         
             
                    declarative_stream: DeclarativeStream,
         
     | 
| 
       312 
     | 
    
         
            -
                    incremental_sync_component_definition: Mapping[str, Any] 
     | 
| 
      
 375 
     | 
    
         
            +
                    incremental_sync_component_definition: Mapping[str, Any],
         
     | 
| 
       313 
376 
     | 
    
         
             
                ) -> bool:
         
     | 
| 
       314 
377 
     | 
    
         
             
                    return (
         
     | 
| 
       315 
     | 
    
         
            -
                        incremental_sync_component_definition 
     | 
| 
       316 
     | 
    
         
            -
                        and bool(incremental_sync_component_definition)
         
     | 
| 
      
 378 
     | 
    
         
            +
                        bool(incremental_sync_component_definition)
         
     | 
| 
       317 
379 
     | 
    
         
             
                        and incremental_sync_component_definition.get("type", "")
         
     | 
| 
       318 
380 
     | 
    
         
             
                        == DatetimeBasedCursorModel.__name__
         
     | 
| 
       319 
381 
     | 
    
         
             
                        and self._stream_supports_concurrent_partition_processing(
         
     | 
| 
         @@ -7,12 +7,8 @@ version: 1.0.0 
     | 
|
| 
       7 
7 
     | 
    
         
             
            required:
         
     | 
| 
       8 
8 
     | 
    
         
             
              - type
         
     | 
| 
       9 
9 
     | 
    
         
             
              - check
         
     | 
| 
      
 10 
     | 
    
         
            +
              - streams
         
     | 
| 
       10 
11 
     | 
    
         
             
              - version
         
     | 
| 
       11 
     | 
    
         
            -
            anyOf:
         
     | 
| 
       12 
     | 
    
         
            -
              - required:
         
     | 
| 
       13 
     | 
    
         
            -
                  - streams
         
     | 
| 
       14 
     | 
    
         
            -
              - required:
         
     | 
| 
       15 
     | 
    
         
            -
                  - dynamic_streams
         
     | 
| 
       16 
12 
     | 
    
         
             
            properties:
         
     | 
| 
       17 
13 
     | 
    
         
             
              type:
         
     | 
| 
       18 
14 
     | 
    
         
             
                type: string
         
     | 
| 
         @@ -23,10 +19,6 @@ properties: 
     | 
|
| 
       23 
19 
     | 
    
         
             
                type: array
         
     | 
| 
       24 
20 
     | 
    
         
             
                items:
         
     | 
| 
       25 
21 
     | 
    
         
             
                  "$ref": "#/definitions/DeclarativeStream"
         
     | 
| 
       26 
     | 
    
         
            -
              dynamic_streams:
         
     | 
| 
       27 
     | 
    
         
            -
                type: array
         
     | 
| 
       28 
     | 
    
         
            -
                items:
         
     | 
| 
       29 
     | 
    
         
            -
                  "$ref": "#/definitions/DynamicDeclarativeStream"
         
     | 
| 
       30 
22 
     | 
    
         
             
              version:
         
     | 
| 
       31 
23 
     | 
    
         
             
                type: string
         
     | 
| 
       32 
24 
     | 
    
         
             
                description: The version of the Airbyte CDK used to build and test the source.
         
     | 
| 
         @@ -1329,7 +1321,7 @@ definitions: 
     | 
|
| 
       1329 
1321 
     | 
    
         
             
                    type: array
         
     | 
| 
       1330 
1322 
     | 
    
         
             
                    items:
         
     | 
| 
       1331 
1323 
     | 
    
         
             
                      - type: string
         
     | 
| 
       1332 
     | 
    
         
            -
                     
     | 
| 
      
 1324 
     | 
    
         
            +
                    interpolation_content:
         
     | 
| 
       1333 
1325 
     | 
    
         
             
                      - config
         
     | 
| 
       1334 
1326 
     | 
    
         
             
                    examples:
         
     | 
| 
       1335 
1327 
     | 
    
         
             
                      - ["data"]
         
     | 
| 
         @@ -2903,96 +2895,6 @@ definitions: 
     | 
|
| 
       2903 
2895 
     | 
    
         
             
                  $parameters:
         
     | 
| 
       2904 
2896 
     | 
    
         
             
                    type: object
         
     | 
| 
       2905 
2897 
     | 
    
         
             
                    additionalProperties: true
         
     | 
| 
       2906 
     | 
    
         
            -
              ComponentMappingDefinition:
         
     | 
| 
       2907 
     | 
    
         
            -
                title: Component Mapping Definition
         
     | 
| 
       2908 
     | 
    
         
            -
                description: (This component is experimental. Use at your own risk.) Specifies a mapping definition to update or add fields in a record or configuration. This allows dynamic mapping of data by interpolating values into the template based on provided contexts.
         
     | 
| 
       2909 
     | 
    
         
            -
                type: object
         
     | 
| 
       2910 
     | 
    
         
            -
                required:
         
     | 
| 
       2911 
     | 
    
         
            -
                  - type
         
     | 
| 
       2912 
     | 
    
         
            -
                  - field_path
         
     | 
| 
       2913 
     | 
    
         
            -
                  - value
         
     | 
| 
       2914 
     | 
    
         
            -
                properties:
         
     | 
| 
       2915 
     | 
    
         
            -
                  type:
         
     | 
| 
       2916 
     | 
    
         
            -
                    type: string
         
     | 
| 
       2917 
     | 
    
         
            -
                    enum: [ComponentMappingDefinition]
         
     | 
| 
       2918 
     | 
    
         
            -
                  field_path:
         
     | 
| 
       2919 
     | 
    
         
            -
                    title: Field Path
         
     | 
| 
       2920 
     | 
    
         
            -
                    description: A list of potentially nested fields indicating the full path where value will be added or updated.
         
     | 
| 
       2921 
     | 
    
         
            -
                    type: array
         
     | 
| 
       2922 
     | 
    
         
            -
                    items:
         
     | 
| 
       2923 
     | 
    
         
            -
                      - type: string
         
     | 
| 
       2924 
     | 
    
         
            -
                    interpolation_context:
         
     | 
| 
       2925 
     | 
    
         
            -
                      - config
         
     | 
| 
       2926 
     | 
    
         
            -
                      - components_values
         
     | 
| 
       2927 
     | 
    
         
            -
                      - stream_template_config
         
     | 
| 
       2928 
     | 
    
         
            -
                    examples:
         
     | 
| 
       2929 
     | 
    
         
            -
                      - ["data"]
         
     | 
| 
       2930 
     | 
    
         
            -
                      - ["data", "records"]
         
     | 
| 
       2931 
     | 
    
         
            -
                      - ["data", "{{ parameters.name }}"]
         
     | 
| 
       2932 
     | 
    
         
            -
                      - ["data", "*", "record"]
         
     | 
| 
       2933 
     | 
    
         
            -
                  value:
         
     | 
| 
       2934 
     | 
    
         
            -
                    title: Value
         
     | 
| 
       2935 
     | 
    
         
            -
                    description: The dynamic or static value to assign to the key. Interpolated values can be used to dynamically determine the value during runtime.
         
     | 
| 
       2936 
     | 
    
         
            -
                    type: string
         
     | 
| 
       2937 
     | 
    
         
            -
                    interpolation_context:
         
     | 
| 
       2938 
     | 
    
         
            -
                      - config
         
     | 
| 
       2939 
     | 
    
         
            -
                      - stream_template_config
         
     | 
| 
       2940 
     | 
    
         
            -
                      - components_values
         
     | 
| 
       2941 
     | 
    
         
            -
                    examples:
         
     | 
| 
       2942 
     | 
    
         
            -
                      - "{{ components_values['updates'] }}"
         
     | 
| 
       2943 
     | 
    
         
            -
                      - "{{ components_values['MetaData']['LastUpdatedTime'] }}"
         
     | 
| 
       2944 
     | 
    
         
            -
                      - "{{ config['segment_id'] }}"
         
     | 
| 
       2945 
     | 
    
         
            -
                  value_type:
         
     | 
| 
       2946 
     | 
    
         
            -
                    title: Value Type
         
     | 
| 
       2947 
     | 
    
         
            -
                    description: The expected data type of the value. If omitted, the type will be inferred from the value provided.
         
     | 
| 
       2948 
     | 
    
         
            -
                    "$ref": "#/definitions/ValueType"
         
     | 
| 
       2949 
     | 
    
         
            -
                  $parameters:
         
     | 
| 
       2950 
     | 
    
         
            -
                    type: object
         
     | 
| 
       2951 
     | 
    
         
            -
                    additionalProperties: true
         
     | 
| 
       2952 
     | 
    
         
            -
              HttpComponentsResolver:
         
     | 
| 
       2953 
     | 
    
         
            -
                type: object
         
     | 
| 
       2954 
     | 
    
         
            -
                description: (This component is experimental. Use at your own risk.) Component resolve and populates stream templates with components fetched via an HTTP retriever.
         
     | 
| 
       2955 
     | 
    
         
            -
                properties:
         
     | 
| 
       2956 
     | 
    
         
            -
                  type:
         
     | 
| 
       2957 
     | 
    
         
            -
                    type: string
         
     | 
| 
       2958 
     | 
    
         
            -
                    enum: [HttpComponentsResolver]
         
     | 
| 
       2959 
     | 
    
         
            -
                  retriever:
         
     | 
| 
       2960 
     | 
    
         
            -
                    title: Retriever
         
     | 
| 
       2961 
     | 
    
         
            -
                    description: Component used to coordinate how records are extracted across stream slices and request pages.
         
     | 
| 
       2962 
     | 
    
         
            -
                    anyOf:
         
     | 
| 
       2963 
     | 
    
         
            -
                      - "$ref": "#/definitions/AsyncRetriever"
         
     | 
| 
       2964 
     | 
    
         
            -
                      - "$ref": "#/definitions/CustomRetriever"
         
     | 
| 
       2965 
     | 
    
         
            -
                      - "$ref": "#/definitions/SimpleRetriever"
         
     | 
| 
       2966 
     | 
    
         
            -
                  components_mapping:
         
     | 
| 
       2967 
     | 
    
         
            -
                    type: array
         
     | 
| 
       2968 
     | 
    
         
            -
                    items:
         
     | 
| 
       2969 
     | 
    
         
            -
                      "$ref": "#/definitions/ComponentMappingDefinition"
         
     | 
| 
       2970 
     | 
    
         
            -
                  $parameters:
         
     | 
| 
       2971 
     | 
    
         
            -
                    type: object
         
     | 
| 
       2972 
     | 
    
         
            -
                    additionalProperties: true
         
     | 
| 
       2973 
     | 
    
         
            -
                required:
         
     | 
| 
       2974 
     | 
    
         
            -
                  - type
         
     | 
| 
       2975 
     | 
    
         
            -
                  - retriever
         
     | 
| 
       2976 
     | 
    
         
            -
                  - components_mapping
         
     | 
| 
       2977 
     | 
    
         
            -
              DynamicDeclarativeStream:
         
     | 
| 
       2978 
     | 
    
         
            -
                type: object
         
     | 
| 
       2979 
     | 
    
         
            -
                description: (This component is experimental. Use at your own risk.) A component that described how will be created declarative streams based on stream template.
         
     | 
| 
       2980 
     | 
    
         
            -
                properties:
         
     | 
| 
       2981 
     | 
    
         
            -
                  type:
         
     | 
| 
       2982 
     | 
    
         
            -
                    type: string
         
     | 
| 
       2983 
     | 
    
         
            -
                    enum: [DynamicDeclarativeStream]
         
     | 
| 
       2984 
     | 
    
         
            -
                  stream_template:
         
     | 
| 
       2985 
     | 
    
         
            -
                    title: Stream Template
         
     | 
| 
       2986 
     | 
    
         
            -
                    description: Reference to the stream template.
         
     | 
| 
       2987 
     | 
    
         
            -
                    "$ref": "#/definitions/DeclarativeStream"
         
     | 
| 
       2988 
     | 
    
         
            -
                  components_resolver:
         
     | 
| 
       2989 
     | 
    
         
            -
                    title: Components Resolver
         
     | 
| 
       2990 
     | 
    
         
            -
                    description: Component resolve and populates stream templates with components values.
         
     | 
| 
       2991 
     | 
    
         
            -
                    "$ref": "#/definitions/HttpComponentsResolver"
         
     | 
| 
       2992 
     | 
    
         
            -
                required:
         
     | 
| 
       2993 
     | 
    
         
            -
                  - type
         
     | 
| 
       2994 
     | 
    
         
            -
                  - stream_template
         
     | 
| 
       2995 
     | 
    
         
            -
                  - components_resolver
         
     | 
| 
       2996 
2898 
     | 
    
         
             
            interpolation:
         
     | 
| 
       2997 
2899 
     | 
    
         
             
              variables:
         
     | 
| 
       2998 
2900 
     | 
    
         
             
                - title: config
         
     | 
| 
         @@ -59,13 +59,11 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter): 
     | 
|
| 
       59 
59 
     | 
    
         | 
| 
       60 
60 
     | 
    
         
             
                def __init__(
         
     | 
| 
       61 
61 
     | 
    
         
             
                    self,
         
     | 
| 
       62 
     | 
    
         
            -
                     
     | 
| 
       63 
     | 
    
         
            -
                    substream_cursor: Optional[Union[PerPartitionWithGlobalCursor, GlobalSubstreamCursor]],
         
     | 
| 
      
 62 
     | 
    
         
            +
                    cursor: Union[DatetimeBasedCursor, PerPartitionWithGlobalCursor, GlobalSubstreamCursor],
         
     | 
| 
       64 
63 
     | 
    
         
             
                    **kwargs: Any,
         
     | 
| 
       65 
64 
     | 
    
         
             
                ):
         
     | 
| 
       66 
65 
     | 
    
         
             
                    super().__init__(**kwargs)
         
     | 
| 
       67 
     | 
    
         
            -
                    self. 
     | 
| 
       68 
     | 
    
         
            -
                    self._substream_cursor = substream_cursor
         
     | 
| 
      
 66 
     | 
    
         
            +
                    self._cursor = cursor
         
     | 
| 
       69 
67 
     | 
    
         | 
| 
       70 
68 
     | 
    
         
             
                def filter_records(
         
     | 
| 
       71 
69 
     | 
    
         
             
                    self,
         
     | 
| 
         @@ -77,7 +75,7 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter): 
     | 
|
| 
       77 
75 
     | 
    
         
             
                    records = (
         
     | 
| 
       78 
76 
     | 
    
         
             
                        record
         
     | 
| 
       79 
77 
     | 
    
         
             
                        for record in records
         
     | 
| 
       80 
     | 
    
         
            -
                        if  
     | 
| 
      
 78 
     | 
    
         
            +
                        if self._cursor.should_be_synced(
         
     | 
| 
       81 
79 
     | 
    
         
             
                            # Record is created on the fly to align with cursors interface; stream name is ignored as we don't need it here
         
     | 
| 
       82 
80 
     | 
    
         
             
                            # Record stream name is empty cause it is not used durig the filtering
         
     | 
| 
       83 
81 
     | 
    
         
             
                            Record(data=record, associated_slice=stream_slice, stream_name="")
         
     | 
| 
         @@ -2,6 +2,7 @@ 
     | 
|
| 
       2 
2 
     | 
    
         
             
            # Copyright (c) 2022 Airbyte, Inc., all rights reserved.
         
     | 
| 
       3 
3 
     | 
    
         
             
            #
         
     | 
| 
       4 
4 
     | 
    
         | 
| 
      
 5 
     | 
    
         
            +
            from airbyte_cdk.sources.declarative.incremental.concurrent_partition_cursor import ConcurrentCursorFactory, ConcurrentPerPartitionCursor
         
     | 
| 
       5 
6 
     | 
    
         
             
            from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
         
     | 
| 
       6 
7 
     | 
    
         
             
            from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
         
     | 
| 
       7 
8 
     | 
    
         
             
            from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import GlobalSubstreamCursor
         
     | 
| 
         @@ -14,6 +15,8 @@ from airbyte_cdk.sources.declarative.incremental.resumable_full_refresh_cursor i 
     | 
|
| 
       14 
15 
     | 
    
         | 
| 
       15 
16 
     | 
    
         
             
            __all__ = [
         
     | 
| 
       16 
17 
     | 
    
         
             
                "CursorFactory",
         
     | 
| 
      
 18 
     | 
    
         
            +
                "ConcurrentCursorFactory"
         
     | 
| 
      
 19 
     | 
    
         
            +
                "ConcurrentPerPartitionCursor",
         
     | 
| 
       17 
20 
     | 
    
         
             
                "DatetimeBasedCursor",
         
     | 
| 
       18 
21 
     | 
    
         
             
                "DeclarativeCursor",
         
     | 
| 
       19 
22 
     | 
    
         
             
                "GlobalSubstreamCursor",
         
     | 
| 
         @@ -0,0 +1,270 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            import copy
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            #
         
     | 
| 
      
 4 
     | 
    
         
            +
            # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
         
     | 
| 
      
 5 
     | 
    
         
            +
            #
         
     | 
| 
      
 6 
     | 
    
         
            +
            import logging
         
     | 
| 
      
 7 
     | 
    
         
            +
            from collections import OrderedDict
         
     | 
| 
      
 8 
     | 
    
         
            +
            from typing import Any, Callable, Iterable, Mapping, MutableMapping, Optional
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
            from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
         
     | 
| 
      
 11 
     | 
    
         
            +
            from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
         
     | 
| 
      
 12 
     | 
    
         
            +
            from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
         
     | 
| 
      
 13 
     | 
    
         
            +
            from airbyte_cdk.sources.message import MessageRepository
         
     | 
| 
      
 14 
     | 
    
         
            +
            from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import (
         
     | 
| 
      
 15 
     | 
    
         
            +
                PerPartitionKeySerializer,
         
     | 
| 
      
 16 
     | 
    
         
            +
            )
         
     | 
| 
      
 17 
     | 
    
         
            +
            from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, CursorField
         
     | 
| 
      
 18 
     | 
    
         
            +
            from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
         
     | 
| 
      
 19 
     | 
    
         
            +
            from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
            logger = logging.getLogger("airbyte")
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
            class ConcurrentCursorFactory:
         
     | 
| 
      
 25 
     | 
    
         
            +
                def __init__(self, create_function: Callable[..., Cursor]):
         
     | 
| 
      
 26 
     | 
    
         
            +
                    self._create_function = create_function
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                def create(self, stream_state: Mapping[str, Any]) -> Cursor:
         
     | 
| 
      
 29 
     | 
    
         
            +
                    return self._create_function(stream_state=stream_state)
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
            class ConcurrentPerPartitionCursor(Cursor):
         
     | 
| 
      
 33 
     | 
    
         
            +
                """
         
     | 
| 
      
 34 
     | 
    
         
            +
                Manages state per partition when a stream has many partitions, to prevent data loss or duplication.
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
                **Partition Limitation and Limit Reached Logic**
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
                - **DEFAULT_MAX_PARTITIONS_NUMBER**: The maximum number of partitions to keep in memory (default is 10,000).
         
     | 
| 
      
 39 
     | 
    
         
            +
                - **_cursor_per_partition**: An ordered dictionary that stores cursors for each partition.
         
     | 
| 
      
 40 
     | 
    
         
            +
                - **_over_limit**: A counter that increments each time an oldest partition is removed when the limit is exceeded.
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
                The class ensures that the number of partitions tracked does not exceed the `DEFAULT_MAX_PARTITIONS_NUMBER` to prevent excessive memory usage.
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
                - When the number of partitions exceeds the limit, the oldest partitions are removed from `_cursor_per_partition`, and `_over_limit` is incremented accordingly.
         
     | 
| 
      
 45 
     | 
    
         
            +
                - The `limit_reached` method returns `True` when `_over_limit` exceeds `DEFAULT_MAX_PARTITIONS_NUMBER`, indicating that the global cursor should be used instead of per-partition cursors.
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
                This approach avoids unnecessary switching to a global cursor due to temporary spikes in partition counts, ensuring that switching is only done when a sustained high number of partitions is observed.
         
     | 
| 
      
 48 
     | 
    
         
            +
                """
         
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
| 
      
 50 
     | 
    
         
            +
                DEFAULT_MAX_PARTITIONS_NUMBER = 10000
         
     | 
| 
      
 51 
     | 
    
         
            +
                _NO_STATE: Mapping[str, Any] = {}
         
     | 
| 
      
 52 
     | 
    
         
            +
                _NO_CURSOR_STATE: Mapping[str, Any] = {}
         
     | 
| 
      
 53 
     | 
    
         
            +
                _KEY = 0
         
     | 
| 
      
 54 
     | 
    
         
            +
                _VALUE = 1
         
     | 
| 
      
 55 
     | 
    
         
            +
                _state_to_migrate_from: Mapping[str, Any] = {}
         
     | 
| 
      
 56 
     | 
    
         
            +
             
     | 
| 
      
 57 
     | 
    
         
            +
                def __init__(
         
     | 
| 
      
 58 
     | 
    
         
            +
                    self,
         
     | 
| 
      
 59 
     | 
    
         
            +
                    cursor_factory: ConcurrentCursorFactory,
         
     | 
| 
      
 60 
     | 
    
         
            +
                    partition_router: PartitionRouter,
         
     | 
| 
      
 61 
     | 
    
         
            +
                    stream_name: str,
         
     | 
| 
      
 62 
     | 
    
         
            +
                    stream_namespace: Optional[str],
         
     | 
| 
      
 63 
     | 
    
         
            +
                    stream_state: Any,
         
     | 
| 
      
 64 
     | 
    
         
            +
                    message_repository: MessageRepository,
         
     | 
| 
      
 65 
     | 
    
         
            +
                    connector_state_manager: ConnectorStateManager,
         
     | 
| 
      
 66 
     | 
    
         
            +
                    cursor_field: CursorField,
         
     | 
| 
      
 67 
     | 
    
         
            +
                ) -> None:
         
     | 
| 
      
 68 
     | 
    
         
            +
                    self._stream_name = stream_name
         
     | 
| 
      
 69 
     | 
    
         
            +
                    self._stream_namespace = stream_namespace
         
     | 
| 
      
 70 
     | 
    
         
            +
                    self._message_repository = message_repository
         
     | 
| 
      
 71 
     | 
    
         
            +
                    self._connector_state_manager = connector_state_manager
         
     | 
| 
      
 72 
     | 
    
         
            +
                    self._cursor_field = cursor_field
         
     | 
| 
      
 73 
     | 
    
         
            +
             
     | 
| 
      
 74 
     | 
    
         
            +
                    self._cursor_factory = cursor_factory
         
     | 
| 
      
 75 
     | 
    
         
            +
                    self._partition_router = partition_router
         
     | 
| 
      
 76 
     | 
    
         
            +
             
     | 
| 
      
 77 
     | 
    
         
            +
                    # The dict is ordered to ensure that once the maximum number of partitions is reached,
         
     | 
| 
      
 78 
     | 
    
         
            +
                    # the oldest partitions can be efficiently removed, maintaining the most recent partitions.
         
     | 
| 
      
 79 
     | 
    
         
            +
                    self._cursor_per_partition: OrderedDict[str, Cursor] = OrderedDict()
         
     | 
| 
      
 80 
     | 
    
         
            +
                    self._over_limit = 0
         
     | 
| 
      
 81 
     | 
    
         
            +
                    self._partition_serializer = PerPartitionKeySerializer()
         
     | 
| 
      
 82 
     | 
    
         
            +
             
     | 
| 
      
 83 
     | 
    
         
            +
                    self._set_initial_state(stream_state)
         
     | 
| 
      
 84 
     | 
    
         
            +
             
     | 
| 
      
 85 
     | 
    
         
            +
                @property
         
     | 
| 
      
 86 
     | 
    
         
            +
                def cursor_field(self) -> CursorField:
         
     | 
| 
      
 87 
     | 
    
         
            +
                    return self._cursor_field
         
     | 
| 
      
 88 
     | 
    
         
            +
             
     | 
| 
      
 89 
     | 
    
         
            +
                @property
         
     | 
| 
      
 90 
     | 
    
         
            +
                def state(self) -> MutableMapping[str, Any]:
         
     | 
| 
      
 91 
     | 
    
         
            +
                    states = []
         
     | 
| 
      
 92 
     | 
    
         
            +
                    for partition_tuple, cursor in self._cursor_per_partition.items():
         
     | 
| 
      
 93 
     | 
    
         
            +
                        cursor_state = cursor._connector_state_converter.convert_to_state_message(
         
     | 
| 
      
 94 
     | 
    
         
            +
                            cursor._cursor_field, cursor.state
         
     | 
| 
      
 95 
     | 
    
         
            +
                        )
         
     | 
| 
      
 96 
     | 
    
         
            +
                        if cursor_state:
         
     | 
| 
      
 97 
     | 
    
         
            +
                            states.append(
         
     | 
| 
      
 98 
     | 
    
         
            +
                                {
         
     | 
| 
      
 99 
     | 
    
         
            +
                                    "partition": self._to_dict(partition_tuple),
         
     | 
| 
      
 100 
     | 
    
         
            +
                                    "cursor": copy.deepcopy(cursor_state),
         
     | 
| 
      
 101 
     | 
    
         
            +
                                }
         
     | 
| 
      
 102 
     | 
    
         
            +
                            )
         
     | 
| 
      
 103 
     | 
    
         
            +
                    state: dict[str, Any] = {"states": states}
         
     | 
| 
      
 104 
     | 
    
         
            +
                    return state
         
     | 
| 
      
 105 
     | 
    
         
            +
             
     | 
| 
      
 106 
     | 
    
         
            +
                def close_partition(self, partition: Partition) -> None:
         
     | 
| 
      
 107 
     | 
    
         
            +
                    self._cursor_per_partition[self._to_partition_key(partition._stream_slice.partition)].close_partition_without_emit(partition=partition)
         
     | 
| 
      
 108 
     | 
    
         
            +
             
     | 
| 
      
 109 
     | 
    
         
            +
                def ensure_at_least_one_state_emitted(self) -> None:
         
     | 
| 
      
 110 
     | 
    
         
            +
                    """
         
     | 
| 
      
 111 
     | 
    
         
            +
                    The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
         
     | 
| 
      
 112 
     | 
    
         
            +
                    called.
         
     | 
| 
      
 113 
     | 
    
         
            +
                    """
         
     | 
| 
      
 114 
     | 
    
         
            +
                    self._emit_state_message()
         
     | 
| 
      
 115 
     | 
    
         
            +
             
     | 
| 
      
 116 
     | 
    
         
            +
                def _emit_state_message(self) -> None:
         
     | 
| 
      
 117 
     | 
    
         
            +
                    self._connector_state_manager.update_state_for_stream(
         
     | 
| 
      
 118 
     | 
    
         
            +
                        self._stream_name,
         
     | 
| 
      
 119 
     | 
    
         
            +
                        self._stream_namespace,
         
     | 
| 
      
 120 
     | 
    
         
            +
                        self.state,
         
     | 
| 
      
 121 
     | 
    
         
            +
                    )
         
     | 
| 
      
 122 
     | 
    
         
            +
                    state_message = self._connector_state_manager.create_state_message(
         
     | 
| 
      
 123 
     | 
    
         
            +
                        self._stream_name, self._stream_namespace
         
     | 
| 
      
 124 
     | 
    
         
            +
                    )
         
     | 
| 
      
 125 
     | 
    
         
            +
                    self._message_repository.emit_message(state_message)
         
     | 
| 
      
 126 
     | 
    
         
            +
             
     | 
| 
      
 127 
     | 
    
         
            +
             
     | 
| 
      
 128 
     | 
    
         
            +
                def stream_slices(self) -> Iterable[StreamSlice]:
         
     | 
| 
      
 129 
     | 
    
         
            +
                    slices = self._partition_router.stream_slices()
         
     | 
| 
      
 130 
     | 
    
         
            +
                    for partition in slices:
         
     | 
| 
      
 131 
     | 
    
         
            +
                        yield from self.generate_slices_from_partition(partition)
         
     | 
| 
      
 132 
     | 
    
         
            +
             
     | 
| 
      
 133 
     | 
    
         
            +
                def generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
         
     | 
| 
      
 134 
     | 
    
         
            +
                    # Ensure the maximum number of partitions is not exceeded
         
     | 
| 
      
 135 
     | 
    
         
            +
                    self._ensure_partition_limit()
         
     | 
| 
      
 136 
     | 
    
         
            +
             
     | 
| 
      
 137 
     | 
    
         
            +
                    cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
         
     | 
| 
      
 138 
     | 
    
         
            +
                    if not cursor:
         
     | 
| 
      
 139 
     | 
    
         
            +
                        partition_state = (
         
     | 
| 
      
 140 
     | 
    
         
            +
                            self._state_to_migrate_from
         
     | 
| 
      
 141 
     | 
    
         
            +
                            if self._state_to_migrate_from
         
     | 
| 
      
 142 
     | 
    
         
            +
                            else self._NO_CURSOR_STATE
         
     | 
| 
      
 143 
     | 
    
         
            +
                        )
         
     | 
| 
      
 144 
     | 
    
         
            +
                        cursor = self._create_cursor(partition_state)
         
     | 
| 
      
 145 
     | 
    
         
            +
                        self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
         
     | 
| 
      
 146 
     | 
    
         
            +
             
     | 
| 
      
 147 
     | 
    
         
            +
                    for cursor_slice in cursor.stream_slices():
         
     | 
| 
      
 148 
     | 
    
         
            +
                        yield StreamSlice(
         
     | 
| 
      
 149 
     | 
    
         
            +
                            partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
         
     | 
| 
      
 150 
     | 
    
         
            +
                        )
         
     | 
| 
      
 151 
     | 
    
         
            +
             
     | 
| 
      
 152 
     | 
    
         
            +
                def _ensure_partition_limit(self) -> None:
         
     | 
| 
      
 153 
     | 
    
         
            +
                    """
         
     | 
| 
      
 154 
     | 
    
         
            +
                    Ensure the maximum number of partitions is not exceeded. If so, the oldest added partition will be dropped.
         
     | 
| 
      
 155 
     | 
    
         
            +
                    """
         
     | 
| 
      
 156 
     | 
    
         
            +
                    while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
         
     | 
| 
      
 157 
     | 
    
         
            +
                        self._over_limit += 1
         
     | 
| 
      
 158 
     | 
    
         
            +
                        oldest_partition = self._cursor_per_partition.popitem(last=False)[
         
     | 
| 
      
 159 
     | 
    
         
            +
                            0
         
     | 
| 
      
 160 
     | 
    
         
            +
                        ]  # Remove the oldest partition
         
     | 
| 
      
 161 
     | 
    
         
            +
                        logger.warning(
         
     | 
| 
      
 162 
     | 
    
         
            +
                            f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
         
     | 
| 
      
 163 
     | 
    
         
            +
                        )
         
     | 
| 
      
 164 
     | 
    
         
            +
             
     | 
| 
      
 165 
     | 
    
         
            +
                def limit_reached(self) -> bool:
         
     | 
| 
      
 166 
     | 
    
         
            +
                    return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER
         
     | 
| 
      
 167 
     | 
    
         
            +
             
     | 
| 
      
 168 
     | 
    
         
            +
                def _set_initial_state(self, stream_state: StreamState) -> None:
         
     | 
| 
      
 169 
     | 
    
         
            +
                    """
         
     | 
| 
      
 170 
     | 
    
         
            +
                    Set the initial state for the cursors.
         
     | 
| 
      
 171 
     | 
    
         
            +
             
     | 
| 
      
 172 
     | 
    
         
            +
                    This method initializes the state for each partition cursor using the provided stream state.
         
     | 
| 
      
 173 
     | 
    
         
            +
                    If a partition state is provided in the stream state, it will update the corresponding partition cursor with this state.
         
     | 
| 
      
 174 
     | 
    
         
            +
             
     | 
| 
      
 175 
     | 
    
         
            +
                    Additionally, it sets the parent state for partition routers that are based on parent streams. If a partition router
         
     | 
| 
      
 176 
     | 
    
         
            +
                    does not have parent streams, this step will be skipped due to the default PartitionRouter implementation.
         
     | 
| 
      
 177 
     | 
    
         
            +
             
     | 
| 
      
 178 
     | 
    
         
            +
                    Args:
         
     | 
| 
      
 179 
     | 
    
         
            +
                        stream_state (StreamState): The state of the streams to be set. The format of the stream state should be:
         
     | 
| 
      
 180 
     | 
    
         
            +
                            {
         
     | 
| 
      
 181 
     | 
    
         
            +
                                "states": [
         
     | 
| 
      
 182 
     | 
    
         
            +
                                    {
         
     | 
| 
      
 183 
     | 
    
         
            +
                                        "partition": {
         
     | 
| 
      
 184 
     | 
    
         
            +
                                            "partition_key": "value"
         
     | 
| 
      
 185 
     | 
    
         
            +
                                        },
         
     | 
| 
      
 186 
     | 
    
         
            +
                                        "cursor": {
         
     | 
| 
      
 187 
     | 
    
         
            +
                                            "last_updated": "2023-05-27T00:00:00Z"
         
     | 
| 
      
 188 
     | 
    
         
            +
                                        }
         
     | 
| 
      
 189 
     | 
    
         
            +
                                    }
         
     | 
| 
      
 190 
     | 
    
         
            +
                                ],
         
     | 
| 
      
 191 
     | 
    
         
            +
                                "parent_state": {
         
     | 
| 
      
 192 
     | 
    
         
            +
                                    "parent_stream_name": {
         
     | 
| 
      
 193 
     | 
    
         
            +
                                        "last_updated": "2023-05-27T00:00:00Z"
         
     | 
| 
      
 194 
     | 
    
         
            +
                                    }
         
     | 
| 
      
 195 
     | 
    
         
            +
                                }
         
     | 
| 
      
 196 
     | 
    
         
            +
                            }
         
     | 
| 
      
 197 
     | 
    
         
            +
                    """
         
     | 
| 
      
 198 
     | 
    
         
            +
                    if not stream_state:
         
     | 
| 
      
 199 
     | 
    
         
            +
                        return
         
     | 
| 
      
 200 
     | 
    
         
            +
             
     | 
| 
      
 201 
     | 
    
         
            +
                    if "states" not in stream_state:
         
     | 
| 
      
 202 
     | 
    
         
            +
                        # We assume that `stream_state` is in a global format that can be applied to all partitions.
         
     | 
| 
      
 203 
     | 
    
         
            +
                        # Example: {"global_state_format_key": "global_state_format_value"}
         
     | 
| 
      
 204 
     | 
    
         
            +
                        self._state_to_migrate_from = stream_state
         
     | 
| 
      
 205 
     | 
    
         
            +
             
     | 
| 
      
 206 
     | 
    
         
            +
                    else:
         
     | 
| 
      
 207 
     | 
    
         
            +
                        for state in stream_state["states"]:
         
     | 
| 
      
 208 
     | 
    
         
            +
                            self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
         
     | 
| 
      
 209 
     | 
    
         
            +
                                self._create_cursor(state["cursor"])
         
     | 
| 
      
 210 
     | 
    
         
            +
                            )
         
     | 
| 
      
 211 
     | 
    
         
            +
             
     | 
| 
      
 212 
     | 
    
         
            +
                        # set default state for missing partitions if it is per partition with fallback to global
         
     | 
| 
      
 213 
     | 
    
         
            +
                        if "state" in stream_state:
         
     | 
| 
      
 214 
     | 
    
         
            +
                            self._state_to_migrate_from = stream_state["state"]
         
     | 
| 
      
 215 
     | 
    
         
            +
             
     | 
| 
      
 216 
     | 
    
         
            +
                    # Set parent state for partition routers based on parent streams
         
     | 
| 
      
 217 
     | 
    
         
            +
                    self._partition_router.set_initial_state(stream_state)
         
     | 
| 
      
 218 
     | 
    
         
            +
             
     | 
| 
      
 219 
     | 
    
         
            +
                def observe(self, record: Record) -> None:
         
     | 
| 
      
 220 
     | 
    
         
            +
                    self._cursor_per_partition[self._to_partition_key(record.associated_slice.partition)].observe(record)
         
     | 
| 
      
 221 
     | 
    
         
            +
             
     | 
| 
      
 222 
     | 
    
         
            +
                def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
         
     | 
| 
      
 223 
     | 
    
         
            +
                    return self._partition_serializer.to_partition_key(partition)
         
     | 
| 
      
 224 
     | 
    
         
            +
             
     | 
| 
      
 225 
     | 
    
         
            +
                def _to_dict(self, partition_key: str) -> Mapping[str, Any]:
         
     | 
| 
      
 226 
     | 
    
         
            +
                    return self._partition_serializer.to_partition(partition_key)
         
     | 
| 
      
 227 
     | 
    
         
            +
             
     | 
| 
      
 228 
     | 
    
         
            +
                def _create_cursor(self, cursor_state: Any) -> DeclarativeCursor:
         
     | 
| 
      
 229 
     | 
    
         
            +
                    cursor = self._cursor_factory.create(stream_state=cursor_state)
         
     | 
| 
      
 230 
     | 
    
         
            +
                    return cursor
         
     | 
| 
      
 231 
     | 
    
         
            +
             
     | 
| 
      
 232 
     | 
    
         
            +
                def should_be_synced(self, record: Record) -> bool:
         
     | 
| 
      
 233 
     | 
    
         
            +
                    return self._get_cursor(record).should_be_synced(record)
         
     | 
| 
      
 234 
     | 
    
         
            +
             
     | 
| 
      
 235 
     | 
    
         
            +
                def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
         
     | 
| 
      
 236 
     | 
    
         
            +
                    if not first.associated_slice or not second.associated_slice:
         
     | 
| 
      
 237 
     | 
    
         
            +
                        raise ValueError(
         
     | 
| 
      
 238 
     | 
    
         
            +
                            f"Both records should have an associated slice but got {first.associated_slice} and {second.associated_slice}"
         
     | 
| 
      
 239 
     | 
    
         
            +
                        )
         
     | 
| 
      
 240 
     | 
    
         
            +
                    if first.associated_slice.partition != second.associated_slice.partition:
         
     | 
| 
      
 241 
     | 
    
         
            +
                        raise ValueError(
         
     | 
| 
      
 242 
     | 
    
         
            +
                            f"To compare records, partition should be the same but got {first.associated_slice.partition} and {second.associated_slice.partition}"
         
     | 
| 
      
 243 
     | 
    
         
            +
                        )
         
     | 
| 
      
 244 
     | 
    
         
            +
             
     | 
| 
      
 245 
     | 
    
         
            +
                    return self._get_cursor(first).is_greater_than_or_equal(
         
     | 
| 
      
 246 
     | 
    
         
            +
                        self._convert_record_to_cursor_record(first),
         
     | 
| 
      
 247 
     | 
    
         
            +
                        self._convert_record_to_cursor_record(second),
         
     | 
| 
      
 248 
     | 
    
         
            +
                    )
         
     | 
| 
      
 249 
     | 
    
         
            +
             
     | 
| 
      
 250 
     | 
    
         
            +
                @staticmethod
         
     | 
| 
      
 251 
     | 
    
         
            +
                def _convert_record_to_cursor_record(record: Record) -> Record:
         
     | 
| 
      
 252 
     | 
    
         
            +
                    return Record(
         
     | 
| 
      
 253 
     | 
    
         
            +
                        record.data,
         
     | 
| 
      
 254 
     | 
    
         
            +
                        StreamSlice(partition={}, cursor_slice=record.associated_slice.cursor_slice)
         
     | 
| 
      
 255 
     | 
    
         
            +
                        if record.associated_slice
         
     | 
| 
      
 256 
     | 
    
         
            +
                        else None,
         
     | 
| 
      
 257 
     | 
    
         
            +
                    )
         
     | 
| 
      
 258 
     | 
    
         
            +
             
     | 
| 
      
 259 
     | 
    
         
            +
                def _get_cursor(self, record: Record) -> Cursor:
         
     | 
| 
      
 260 
     | 
    
         
            +
                    if not record.associated_slice:
         
     | 
| 
      
 261 
     | 
    
         
            +
                        raise ValueError(
         
     | 
| 
      
 262 
     | 
    
         
            +
                            "Invalid state as stream slices that are emitted should refer to an existing cursor"
         
     | 
| 
      
 263 
     | 
    
         
            +
                        )
         
     | 
| 
      
 264 
     | 
    
         
            +
                    partition_key = self._to_partition_key(record.associated_slice.partition)
         
     | 
| 
      
 265 
     | 
    
         
            +
                    if partition_key not in self._cursor_per_partition:
         
     | 
| 
      
 266 
     | 
    
         
            +
                        raise ValueError(
         
     | 
| 
      
 267 
     | 
    
         
            +
                            "Invalid state as stream slices that are emitted should refer to an existing cursor"
         
     | 
| 
      
 268 
     | 
    
         
            +
                        )
         
     | 
| 
      
 269 
     | 
    
         
            +
                    cursor = self._cursor_per_partition[partition_key]
         
     | 
| 
      
 270 
     | 
    
         
            +
                    return cursor
         
     | 
| 
         @@ -303,6 +303,15 @@ class PerPartitionCursor(DeclarativeCursor): 
     | 
|
| 
       303 
303 
     | 
    
         
             
                        raise ValueError("A partition needs to be provided in order to get request body json")
         
     | 
| 
       304 
304 
     | 
    
         | 
| 
       305 
305 
     | 
    
         
             
                def should_be_synced(self, record: Record) -> bool:
         
     | 
| 
      
 306 
     | 
    
         
            +
                    if self._to_partition_key(record.associated_slice.partition) not in self._cursor_per_partition:
         
     | 
| 
      
 307 
     | 
    
         
            +
                        partition_state = (
         
     | 
| 
      
 308 
     | 
    
         
            +
                            self._state_to_migrate_from
         
     | 
| 
      
 309 
     | 
    
         
            +
                            if self._state_to_migrate_from
         
     | 
| 
      
 310 
     | 
    
         
            +
                            else self._NO_CURSOR_STATE
         
     | 
| 
      
 311 
     | 
    
         
            +
                        )
         
     | 
| 
      
 312 
     | 
    
         
            +
                        cursor = self._create_cursor(partition_state)
         
     | 
| 
      
 313 
     | 
    
         
            +
             
     | 
| 
      
 314 
     | 
    
         
            +
                        self._cursor_per_partition[self._to_partition_key(record.associated_slice.partition)] = cursor
         
     | 
| 
       306 
315 
     | 
    
         
             
                    return self._get_cursor(record).should_be_synced(
         
     | 
| 
       307 
316 
     | 
    
         
             
                        self._convert_record_to_cursor_record(record)
         
     | 
| 
       308 
317 
     | 
    
         
             
                    )
         
     |