PyPI - airbyte-cdk - Versions diffs - 0.70.2__py3-none-any.whl → 0.72.0__py3-none-any.whl - Mend

airbyte-cdk 0.70.2py3-none-any.whl → 0.72.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

airbyte_cdk/sources/declarative/declarative_component_schema.yaml CHANGED Viewed

@@ -602,6 +602,27 @@ definitions:
       $parameters:
         type: object
         additionalProperties: true
+  CustomSchemaLoader:
+    title: Custom Schema Loader
+    description: Schema Loader component whose behavior is derived from a custom code implementation of the connector.
+    type: object
+    additionalProperties: true
+    required:
+      - type
+      - class_name
+    properties:
+      type:
+        type: string
+        enum: [CustomSchemaLoader]
+      class_name:
+        title: Class Name
+        description: Fully-qualified name of the class that will be implementing the custom schema loader. The format is `source_<name>.<package>.<class_name>`.
+        type: string
+        examples:
+          - "source_railz.components.MyCustomSchemaLoader"
+      $parameters:
+        type: object
+        additionalProperties: true
   CustomTransformation:
     title: Custom Transformation
     description: Transformation component whose behavior is derived from a custom code implementation of the connector.
@@ -948,6 +969,7 @@ definitions:
         anyOf:
           - "$ref": "#/definitions/InlineSchemaLoader"
           - "$ref": "#/definitions/JsonFileSchemaLoader"
+          - "$ref": "#/definitions/CustomSchemaLoader"
       # TODO we have move the transformation to the RecordSelector level in the code but kept this here for
       # compatibility reason. We should eventually move this to align with the code.
       transformations:

airbyte_cdk/sources/declarative/incremental/cursor.py CHANGED Viewed

@@ -24,18 +24,28 @@ class Cursor(ABC, StreamSlicer):
         :param stream_state: The state of the stream as returned by get_stream_state
         """
+    def observe(self, stream_slice: StreamSlice, record: Record) -> None:
+        """
+        Register a record with the cursor; the cursor instance can then use it to manage the state of the in-progress stream read.
+        :param stream_slice: The current slice, which may or may not contain the most recently observed record
+        :param record: the most recently-read record, which the cursor can use to update the stream state. Outwardly-visible changes to the
+          stream state may need to be deferred depending on whether the source reliably orders records by the cursor field.
+        """
+        pass
     @abstractmethod
     def close_slice(self, stream_slice: StreamSlice, most_recent_record: Optional[Record]) -> None:
         """
         Update state based on the stream slice and the latest record. Note that `stream_slice.cursor_slice` and
-        `last_record.associated_slice` are expected to be the same but we make it explicit here that `stream_slice` should be leveraged to
+        `most_recent_record.associated_slice` are expected to be the same but we make it explicit here that `stream_slice` should be leveraged to
         update the state.
         :param stream_slice: slice to close
-        :param last_record: the latest record we have received for the slice. This is important to consider because even if the cursor emits
-          a slice, some APIs are not able to enforce the upper boundary. The outcome is that the last_record might have a higher cursor
-          value than the slice upper boundary and if we want to reduce the duplication as much as possible, we need to consider the highest
-          value between the internal cursor, the stream slice upper boundary and the record cursor value.
+        :param most_recent_record: the latest record we have received for the slice. This is important to consider because even if the
+          cursor emits a slice, some APIs are not able to enforce the upper boundary. The outcome is that the last_record might have a
+          higher cursor value than the slice upper boundary and if we want to reduce the duplication as much as possible, we need to
+          consider the highest value between the internal cursor, the stream slice upper boundary and the record cursor value.
         """
     @abstractmethod

airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py CHANGED Viewed

@@ -52,7 +52,12 @@ class DatetimeBasedCursor(Cursor):
     datetime_format: str
     config: Config
     parameters: InitVar[Mapping[str, Any]]
-    _cursor: Optional[str] = field(repr=False, default=None)  # tracks current datetime
+    _highest_observed_cursor_field_value: Optional[str] = field(
+        repr=False, default=None
+    )  # tracks the latest observed datetime, which may not be safe to emit in the case of out-of-order records
+    _cursor: Optional[str] = field(
+        repr=False, default=None
+    )  # tracks the latest observed datetime that is appropriate to emit as stream state
     end_datetime: Optional[Union[MinMaxDatetime, str]] = None
     step: Optional[Union[InterpolatedString, str]] = None
     cursor_granularity: Optional[str] = None
@@ -109,20 +114,39 @@ class DatetimeBasedCursor(Cursor):
         """
         self._cursor = stream_state.get(self._cursor_field.eval(self.config)) if stream_state else None
-    def close_slice(self, stream_slice: StreamSlice, most_recent_record: Optional[Record]) -> None:
+    def observe(self, stream_slice: StreamSlice, record: Record) -> None:
+        """
+        Register a record with the cursor; the cursor instance can then use it to manage the state of the in-progress stream read.
+        :param stream_slice: The current slice, which may or may not contain the most recently observed record
+        :param record: the most recently-read record, which the cursor can use to update the stream state. Outwardly-visible changes to the
+          stream state may need to be deferred depending on whether the source reliably orders records by the cursor field.
+        """
+        record_cursor_value = record.get(self._cursor_field.eval(self.config))
+        # if the current record has no cursor value, we cannot meaningfully update the state based on it, so there is nothing more to do
+        if not record_cursor_value:
+            return
+        start_field = self._partition_field_start.eval(self.config)
+        end_field = self._partition_field_end.eval(self.config)
+        is_highest_observed_cursor_value = not self._highest_observed_cursor_field_value or self.parse_date(
+            record_cursor_value
+        ) > self.parse_date(self._highest_observed_cursor_field_value)
+        if (
+            self._is_within_daterange_boundaries(record, stream_slice.get(start_field), stream_slice.get(end_field))  # type: ignore # we know that stream_slices for these cursors will use a string representing an unparsed date
+            and is_highest_observed_cursor_value
+        ):
+            self._highest_observed_cursor_field_value = record_cursor_value
+    def close_slice(self, stream_slice: StreamSlice, _most_recent_record: Optional[Record]) -> None:
         if stream_slice.partition:
             raise ValueError(f"Stream slice {stream_slice} should not have a partition. Got {stream_slice.partition}.")
-        last_record_cursor_value = most_recent_record.get(self._cursor_field.eval(self.config)) if most_recent_record else None
-        stream_slice_value_end = stream_slice.get(self._partition_field_end.eval(self.config))
-        potential_cursor_values = [
-            cursor_value for cursor_value in [self._cursor, last_record_cursor_value, stream_slice_value_end] if cursor_value
-        ]
         cursor_value_str_by_cursor_value_datetime = dict(
             map(
                 # we need to ensure the cursor value is preserved as is in the state else the CATs might complain of something like
                 # 2023-01-04T17:30:19.000Z' <= '2023-01-04T17:30:19.000000Z'
-                lambda datetime_str: (self.parse_date(datetime_str), datetime_str),
-                potential_cursor_values,
+                lambda datetime_str: (self.parse_date(datetime_str), datetime_str),  # type: ignore # because of the filter on the next line, this will only be called with a str
+                filter(lambda item: item, [self._cursor, self._highest_observed_cursor_field_value]),
             )
         )
         self._cursor = (
@@ -279,10 +303,26 @@ class DatetimeBasedCursor(Cursor):
                 f"Could not find cursor field `{cursor_field}` in record. The incremental sync will assume it needs to be synced",
             )
             return True
         latest_possible_cursor_value = self._select_best_end_datetime()
         earliest_possible_cursor_value = self._calculate_earliest_possible_value(latest_possible_cursor_value)
-        return earliest_possible_cursor_value <= self.parse_date(record_cursor_value) <= latest_possible_cursor_value
+        return self._is_within_daterange_boundaries(record, earliest_possible_cursor_value, latest_possible_cursor_value)
+    def _is_within_daterange_boundaries(
+        self, record: Record, start_datetime_boundary: Union[datetime.datetime, str], end_datetime_boundary: Union[datetime.datetime, str]
+    ) -> bool:
+        cursor_field = self._cursor_field.eval(self.config)
+        record_cursor_value = record.get(cursor_field)
+        if not record_cursor_value:
+            self._send_log(
+                Level.WARN,
+                f"Could not find cursor field `{cursor_field}` in record. The record will not be considered when emitting sync state",
+            )
+            return False
+        if isinstance(start_datetime_boundary, str):
+            start_datetime_boundary = self.parse_date(start_datetime_boundary)
+        if isinstance(end_datetime_boundary, str):
+            end_datetime_boundary = self.parse_date(end_datetime_boundary)
+        return start_datetime_boundary <= self.parse_date(record_cursor_value) <= end_datetime_boundary
     def _send_log(self, level: Level, message: str) -> None:
         if self.message_repository:

airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py CHANGED Viewed

@@ -86,6 +86,11 @@ class PerPartitionCursor(Cursor):
         for state in stream_state["states"]:
             self._cursor_per_partition[self._to_partition_key(state["partition"])] = self._create_cursor(state["cursor"])
+    def observe(self, stream_slice: StreamSlice, record: Record) -> None:
+        self._cursor_per_partition[self._to_partition_key(stream_slice.partition)].observe(
+            StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice), record
+        )
     def close_slice(self, stream_slice: StreamSlice, most_recent_record: Optional[Record]) -> None:
         try:
             cursor_most_recent_record = (

airbyte_cdk/sources/declarative/models/declarative_component_schema.py CHANGED Viewed

@@ -208,6 +208,20 @@ class CustomPartitionRouter(BaseModel):
     parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters')
+class CustomSchemaLoader(BaseModel):
+    class Config:
+        extra = Extra.allow
+    type: Literal['CustomSchemaLoader']
+    class_name: str = Field(
+        ...,
+        description='Fully-qualified name of the class that will be implementing the custom schema loader. The format is `source_<name>.<package>.<class_name>`.',
+        examples=['source_railz.components.MyCustomSchemaLoader'],
+        title='Class Name',
+    )
+    parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters')
 class CustomTransformation(BaseModel):
     class Config:
         extra = Extra.allow
@@ -1161,7 +1175,9 @@ class DeclarativeStream(BaseModel):
     primary_key: Optional[PrimaryKey] = Field(
         '', description='The primary key of the stream.', title='Primary Key'
     )
-    schema_loader: Optional[Union[InlineSchemaLoader, JsonFileSchemaLoader]] = Field(
+    schema_loader: Optional[
+        Union[InlineSchemaLoader, JsonFileSchemaLoader, CustomSchemaLoader]
+    ] = Field(
         None,
         description='Component used to retrieve the schema for the current stream.',
         title='Schema Loader',

airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py CHANGED Viewed

@@ -49,6 +49,7 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomRecordFilter as CustomRecordFilterModel
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomRequester as CustomRequesterModel
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomRetriever as CustomRetrieverModel
+from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomSchemaLoader as CustomSchemaLoader
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomTransformation as CustomTransformationModel
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import DatetimeBasedCursor as DatetimeBasedCursorModel
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import DeclarativeStream as DeclarativeStreamModel
@@ -165,6 +166,7 @@ class ModelToComponentFactory:
             CustomRecordFilterModel: self.create_custom_component,
             CustomRequesterModel: self.create_custom_component,
             CustomRetrieverModel: self.create_custom_component,
+            CustomSchemaLoader: self.create_custom_component,
             CustomPaginationStrategyModel: self.create_custom_component,
             CustomPartitionRouterModel: self.create_custom_component,
             CustomTransformationModel: self.create_custom_component,

airbyte_cdk/sources/declarative/retrievers/simple_retriever.py CHANGED Viewed

@@ -322,7 +322,13 @@ class SimpleRetriever(Retriever):
             records_schema=records_schema,
         )
         for stream_data in self._read_pages(record_generator, self.state, _slice):
-            most_recent_record_from_slice = self._get_most_recent_record(most_recent_record_from_slice, stream_data, _slice)
+            current_record = self._extract_record(stream_data, _slice)
+            if self.cursor and current_record:
+                self.cursor.observe(_slice, current_record)
+            # TODO this is just the most recent record *read*, not necessarily the most recent record *within slice boundaries*; once all
+            # cursors implement a meaningful `observe` method, it can be removed, both from here and the `Cursor.close_slice` method args
+            most_recent_record_from_slice = self._get_most_recent_record(most_recent_record_from_slice, current_record, _slice)
             yield stream_data
         if self.cursor:
@@ -330,13 +336,13 @@ class SimpleRetriever(Retriever):
         return
     def _get_most_recent_record(
-        self, current_most_recent: Optional[Record], stream_data: StreamData, stream_slice: StreamSlice
+        self, current_most_recent: Optional[Record], current_record: Optional[Record], stream_slice: StreamSlice
     ) -> Optional[Record]:
-        if self.cursor and (record := self._extract_record(stream_data, stream_slice)):
+        if self.cursor and current_record:
             if not current_most_recent:
-                return record
+                return current_record
             else:
-                return current_most_recent if self.cursor.is_greater_than_or_equal(current_most_recent, record) else record
+                return current_most_recent if self.cursor.is_greater_than_or_equal(current_most_recent, current_record) else current_record
         else:
             return None

{airbyte_cdk-0.70.2.dist-info → airbyte_cdk-0.72.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: airbyte-cdk
-Version: 0.70.2
+Version: 0.72.0
 Summary: A framework for writing Airbyte Connectors.
 Home-page: https://github.com/airbytehq/airbyte
 Author: Airbyte

{airbyte_cdk-0.70.2.dist-info → airbyte_cdk-0.72.0.dist-info}/RECORD RENAMED Viewed

@@ -38,7 +38,7 @@ airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py
 airbyte_cdk/sources/concurrent_source/thread_pool_manager.py,sha256=hFj5rsRtORurl3fwH8GC9h6Uz2wbzBFOLWUxJ-YJ7J8,4801
 airbyte_cdk/sources/declarative/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
 airbyte_cdk/sources/declarative/create_partial.py,sha256=sUJOwD8hBzW4pxw2XhYlSTMgl-WMc5WpP5Oq_jo3fHw,3371
-airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=An4UXx4e_GodeVd0bSQTv1G_Z1yjUcb7NbOmcC9-i9I,89327
+airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=netrMub3A9k9wk5VWx8vqDWhfeLk_sviHHJ8NXnH2OA,90111
 airbyte_cdk/sources/declarative/declarative_source.py,sha256=U2As9PDKmcWDgbsWUo-RetJ9fxQOBlwntWZ0NOgs5Ac,1453
 airbyte_cdk/sources/declarative/declarative_stream.py,sha256=9nBjSBilzH2aeJsUEqOLyc4G2RRjlPZCapHDMv4jnOU,6691
 airbyte_cdk/sources/declarative/exceptions.py,sha256=kTPUA4I2NV4J6HDz-mKPGMrfuc592akJnOyYx38l_QM,176
@@ -67,9 +67,9 @@ airbyte_cdk/sources/declarative/extractors/record_extractor.py,sha256=-p9X6UV3iS
 airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=kH5DrBHr6DdpmGqWx4aFRXkprL-VGEHI5BcG3A-0Cjg,1394
 airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=m3IzOp_wo6QnQXQ3bpxROmHA0P_YeuPDIpBlWvyBXq0,4366
 airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=BYzVRQ4MSmhLCajgTi1Y_FHlwCBTdsMDT6zRmYMytws,425
-airbyte_cdk/sources/declarative/incremental/cursor.py,sha256=cGAYP-Std-_MNsX4KGIP4FwDff6WdECV7CMgMi1uKSg,2890
-airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=B-4CmFwv6zXDHYJ2NFe-Ct-n360YMNSZ4ruxGe8cuSg,15802
-airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=8Oq6pgbQjAlkQCPAgOoa-fK9Yb6iyx1oYqdsaMjamT0,12241
+airbyte_cdk/sources/declarative/incremental/cursor.py,sha256=KgKGGgVY_JJo4JHRafo5__61Xu3hVfTvDKoSSM6AmTA,3523
+airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=uLFSm5K8oBbbQXDrAYDHY_B55Nzwv27m4Qvgegqx5GM,18384
+airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=pXVty1WnW2YJ013yoApYgNBdEat47XjgKCMhLS7RkEg,12504
 airbyte_cdk/sources/declarative/interpolation/__init__.py,sha256=tjUJkn3B-iZ-p7RP2c3dVZejrGiQeooGmS5ibWTuUL4,437
 airbyte_cdk/sources/declarative/interpolation/filters.py,sha256=V5XL-IEFNn08YdkJl4A54-G73qJ8P7WAQOYgf1-fXzQ,2809
 airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py,sha256=p5XbZB1cvP_U6HBBHX4PIFlXMHB9vdhSeZ5N3N8AuBY,1835
@@ -80,14 +80,14 @@ airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=dyIM-bzh54
 airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=hOLBs9VaaE5xsT2wY2VxSrISE165bu_Egb83ordG4XI,5379
 airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=V6WGKJ9cXX1rjuM4bK3Cs9xEryMlkY2U3FMsSBhrgC8,3098
 airbyte_cdk/sources/declarative/models/__init__.py,sha256=EiYnzwCHZV7EYqMJqcy6xKSeHvTKZBsQndjbEwmiTW4,93
-airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=u6i4nYufXZe27bIED7uhsvfhghOMN4NdWoGLEeTjzwk,61032
+airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=1a67m2fGAdBRf7rOzvk5SIluQHIWL4SPLnrjsnrnm_s,61574
 airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
 airbyte_cdk/sources/declarative/parsers/class_types_registry.py,sha256=5vOvMuyWlpALrOq2ehLxa7wO6tlFIlgUNtMYrMCKIjE,6092
 airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=y7_G5mM07zxT5YG975kdC2PAja-Uc83pYp8WrV3GNdo,522
 airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py,sha256=W8BcK4KOg4ifNXgsdeIoV4oneHjXBKcPHEZHIC4r-hM,3801
 airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=i2yUdrdlPUHI0dSQX0zBT8WSg912SMiCF8qkQ8VvlA4,8287
 airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=6ukHx0bBrCJm9rek1l_MEfS3U_gdJcM4pJRyifJEOp0,6412
-airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=_jX1sPW5SA51nHoxpoVktC3jdO8PR4cGrQfZftcVSLo,59316
+airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=1zI1Mk9k_3p_TMz6LsgV7U54CJ6etl88q8WOv4AZO-w,59499
 airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=27sOWhw2LBQs62HchURakHQ2M_mtnOatNgU6q8RUtpU,476
 airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha256=L22D-up7W2HahZZo2dA-IbRSs7qJEahU6O6bU0eiIt8,4324
 airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py,sha256=cl-TQdu_m6_IF20gdD1jll0SpejIyMZHvyGXx2NafuI,1611
@@ -128,7 +128,7 @@ airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_
 airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py,sha256=mCdh3UoAZoLycm--JfWDxXcjMKI2j6bFkRZRdOz67xc,2602
 airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=IiHXDeKtibRqeWcRUckmSiXfk--u-sFMw3APWK8PCGQ,339
 airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=h3wI68k9NxYE39jMZPOzL72XYTcScFvVeev-DZ_nPoo,1753
-airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=1r-xK2D-yZPNpOKCaU9z500r5T96jWV7io-sjmh3QkQ,19204
+airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=yHciSsy1CK-NyfWRO4B_ySLF35itv6tkrCBaL_ZSAO4,19650
 airbyte_cdk/sources/declarative/schema/__init__.py,sha256=ul8L9S0-__AMEdbCLHBq-PMEeA928NVp8BB83BMotfU,517
 airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=t0ll098cIG2Wr1rq1rZ3QDZ9WnScUuqAh42YVoTRWrU,1794
 airbyte_cdk/sources/declarative/schema/inline_schema_loader.py,sha256=bVETE10hRsatRJq3R3BeyRR0wIoK3gcP1gcpVRQ_P5U,464
@@ -315,9 +315,9 @@ unit_tests/sources/declarative/extractors/test_dpath_extractor.py,sha256=-bgWKAi
 unit_tests/sources/declarative/extractors/test_record_filter.py,sha256=mcR6Zc3BoVDm_hkmx3J3zFShi2CdudqxR2U1JRxkgzA,2329
 unit_tests/sources/declarative/extractors/test_record_selector.py,sha256=06gLLRwom45YtdsKm9OUabpkioXSDKEnv0DsDTvItC4,6884
 unit_tests/sources/declarative/incremental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py,sha256=cvgBYxAJM-_w7ABOL66vZVPr7PBHtt7YGCALIeq9I9Q,36375
+unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py,sha256=R-QX1npdwzGShcxY3_Zrbm1jP0sxyZ8AcOe2O-vtWdo,38486
 unit_tests/sources/declarative/incremental/test_per_partition_cursor.py,sha256=Xj3vYxB2kbhOZkJ9p2MXuOLq0FwU17UoxM1vuRaN1_k,20402
-unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py,sha256=S4Vg2qf7RVRRxw5DkBjSOOFc_LCKUs7fZ8Qtf4WGB08,12519
+unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py,sha256=SeEb7jfXRPO7bbQVtm1eeo-JOR4PQXDkWbOA3fc89Kw,12897
 unit_tests/sources/declarative/interpolation/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
 unit_tests/sources/declarative/interpolation/test_filters.py,sha256=gPGDNPeLu87rj1WcVVpLsosh6Dzgh0ihsyDkEIdGI_E,2388
 unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py,sha256=vr45nStygl09eNsFYN_YiaqcLBWse9OW_wMc7orHoHU,1804
@@ -329,7 +329,7 @@ unit_tests/sources/declarative/interpolation/test_macros.py,sha256=vEZmHQ0KsfQUz
 unit_tests/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
 unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py,sha256=egePHWYcXprfPtoHhiquWAXuJkDr-DB_RakKhdyaoHs,14316
 unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py,sha256=K3q9eyx-sJFQ8nGYjAgS7fxau4sX_FlNreEAjiCYOeE,5306
-unit_tests/sources/declarative/parsers/test_model_to_component_factory.py,sha256=WIUygDJvNGEIzNdlarkYymTTgRtiXlR6IOjwnsqlC3E,75683
+unit_tests/sources/declarative/parsers/test_model_to_component_factory.py,sha256=cDlDoNutC6JMGdyvkYMteiHtVrpQ_cKnRE_yn6dWui0,76426
 unit_tests/sources/declarative/parsers/testing_components.py,sha256=_yUijmYRM-yYHPGDB2JsfEiOuVrgexGW9QwHf1xxNW8,1326
 unit_tests/sources/declarative/partition_routers/__init__.py,sha256=O8MZg4Bv_DghdRy9BoJCPIqdV75VtiUrhEkExQgb2nE,61
 unit_tests/sources/declarative/partition_routers/test_list_partition_router.py,sha256=WKdbAQCHfCVOyoAFM_kbHsbqAF_e5FX5Zvou5ARsJZ4,6572
@@ -360,7 +360,7 @@ unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py,sha2
 unit_tests/sources/declarative/requesters/request_options/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
 unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py,sha256=bjcaTb8I37tBhs5b_FLRTLkDZAmKjGRywpcN4oGl-zI,5900
 unit_tests/sources/declarative/retrievers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
-unit_tests/sources/declarative/retrievers/test_simple_retriever.py,sha256=mDF2m-2hqTr7VkrdoIffE8vMaUU9sv5D4At0DVtgEJw,20273
+unit_tests/sources/declarative/retrievers/test_simple_retriever.py,sha256=TLd9k1GsV0kjl2rVvObUzoKWFcYW0ILBvOOJVTzqxZ4,20316
 unit_tests/sources/declarative/schema/__init__.py,sha256=i-iWyCqXPVgY-4miy16FH8U06gW_1_49AVq_8S8rVWY,134
 unit_tests/sources/declarative/schema/test_default_schema_loader.py,sha256=cWOFJnT9fhcEU6XLHkoe3E83mCjWc8lEttT0PFcvAm8,1091
 unit_tests/sources/declarative/schema/test_inline_schema_loader.py,sha256=vDJauhZ8og8M9ZqKDbf12SSYSfhUZ0_LmH7zjJHCHwI,517
@@ -459,8 +459,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg
 unit_tests/utils/test_secret_utils.py,sha256=CdKK8A2-5XVxbXVtX22FK9dwwMeP5KNqDH6luWRXSNw,5256
 unit_tests/utils/test_stream_status_utils.py,sha256=Xr8MZ2HWgTVIyMbywDvuYkRaUF4RZLQOT8-JjvcfR24,2970
 unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
-airbyte_cdk-0.70.2.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
-airbyte_cdk-0.70.2.dist-info/METADATA,sha256=2B0x5Y9M3ZyRPwYmXjynUjYCeFnTldJIPQCAcAuqYMs,11074
-airbyte_cdk-0.70.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-airbyte_cdk-0.70.2.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
-airbyte_cdk-0.70.2.dist-info/RECORD,,
+airbyte_cdk-0.72.0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
+airbyte_cdk-0.72.0.dist-info/METADATA,sha256=lDN4hbkJHUsXxycTKKTDAgCzzo72JYBIzlCOZzuU0nM,11074
+airbyte_cdk-0.72.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+airbyte_cdk-0.72.0.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
+airbyte_cdk-0.72.0.dist-info/RECORD,,

unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py CHANGED Viewed

@@ -338,55 +338,96 @@ def test_stream_slices(
 @pytest.mark.parametrize(
-    "test_name, previous_cursor, stream_slice, latest_record_data, expected_state",
+    "test_name, previous_cursor, stream_slice, observed_records, expected_state",
     [
         (
             "test_close_slice_previous_cursor_is_highest",
             "2023-01-01",
-            StreamSlice(partition={}, cursor_slice={"end_time": "2022-01-01"}),
-            {cursor_field: "2021-01-01"},
+            StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
+            [{cursor_field: "2021-01-01"}],
             {cursor_field: "2023-01-01"},
         ),
         (
             "test_close_slice_stream_slice_partition_end_is_highest",
-            "2021-01-01",
-            StreamSlice(partition={}, cursor_slice={"end_time": "2023-01-01"}),
+            "2020-01-01",
+            StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2023-01-01"}),
+            [{cursor_field: "2021-01-01"}],
             {cursor_field: "2021-01-01"},
-            {cursor_field: "2023-01-01"},
         ),
         (
-            "test_close_slice_latest_record_cursor_value_is_highest",
+            "test_close_slice_latest_record_cursor_value_is_higher_than_slice_end",
             "2021-01-01",
-            StreamSlice(partition={}, cursor_slice={"end_time": "2022-01-01"}),
-            {cursor_field: "2023-01-01"},
-            {cursor_field: "2023-01-01"},
+            StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
+            [{cursor_field: "2023-01-01"}],
+            {cursor_field: "2021-01-01"},
         ),
         (
-            "test_close_slice_without_latest_record",
+            "test_close_slice_with_no_records_observed",
             "2021-01-01",
-            StreamSlice(partition={}, cursor_slice={"end_time": "2022-01-01"}),
+            StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
+            [],
+            {cursor_field: "2021-01-01"},
+        ),
+        (
+            "test_close_slice_with_no_records_observed_and_no_previous_state",
             None,
+            StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
+            [],
+            {},
+        ),
+        (
+            "test_close_slice_without_previous_cursor",
+            None,
+            StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2023-01-01"}),
+            [{cursor_field: "2022-01-01"}],
             {cursor_field: "2022-01-01"},
         ),
         (
-            "test_close_slice_without_cursor",
+            "test_close_slice_with_out_of_order_records",
+            "2021-01-01",
+            StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
+            [{cursor_field: "2021-04-01"}, {cursor_field: "2021-02-01"}, {cursor_field: "2021-03-01"}],
+            {cursor_field: "2021-04-01"},
+        ),
+        (
+            "test_close_slice_with_some_records_out_of_slice_boundaries",
+            "2021-01-01",
+            StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
+            [{cursor_field: "2021-02-01"}, {cursor_field: "2021-03-01"}, {cursor_field: "2023-01-01"}],
+            {cursor_field: "2021-03-01"},
+        ),
+        (
+            "test_close_slice_with_all_records_out_of_slice_boundaries",
+            "2021-01-01",
+            StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
+            [{cursor_field: "2023-01-01"}],
+            {cursor_field: "2021-01-01"},
+        ),
+        (
+            "test_close_slice_with_all_records_out_of_slice_and_no_previous_cursor",
             None,
-            StreamSlice(partition={}, cursor_slice={"end_time": "2022-01-01"}),
-            {cursor_field: "2023-01-01"},
-            {cursor_field: "2023-01-01"},
+            StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
+            [{cursor_field: "2023-01-01"}],
+            {},
         ),
     ],
 )
-def test_close_slice(test_name, previous_cursor, stream_slice, latest_record_data, expected_state):
+def test_close_slice(test_name, previous_cursor, stream_slice, observed_records, expected_state):
     cursor = DatetimeBasedCursor(
         start_datetime=MinMaxDatetime(datetime="2021-01-01T00:00:00.000000+0000", parameters={}),
         cursor_field=InterpolatedString(string=cursor_field, parameters={}),
         datetime_format="%Y-%m-%d",
         config=config,
         parameters={},
+        partition_field_start="start_time",
+        partition_field_end="end_time",
     )
-    cursor._cursor = previous_cursor
-    cursor.close_slice(stream_slice, Record(latest_record_data, stream_slice) if latest_record_data else None)
+    cursor.set_initial_state({cursor_field: previous_cursor})
+    for record_data in observed_records:
+        record = Record(record_data, stream_slice)
+        cursor.observe(stream_slice, record)
+    last_record = observed_records[-1] if observed_records else None
+    cursor.close_slice(stream_slice, Record(last_record, stream_slice) if last_record else None)
     updated_state = cursor.get_stream_state()
     assert updated_state == expected_state
@@ -404,37 +445,42 @@ def test_close_slice_fails_if_slice_has_a_partition():
         cursor.close_slice(stream_slice, Record({"id": 1}, stream_slice))
-def test_given_different_format_and_slice_is_highest_when_close_slice_then_slice_datetime_format():
+def test_compares_cursor_values_by_chronological_order():
     cursor = DatetimeBasedCursor(
         start_datetime=MinMaxDatetime(datetime="2021-01-01T00:00:00.000000+0000", parameters={}),
         cursor_field=cursor_field,
-        datetime_format="%Y-%m-%dT%H:%M:%S.%fZ",
-        cursor_datetime_formats=["%Y-%m-%d"],
+        datetime_format="%d-%m-%Y",
         config=config,
         parameters={},
     )
-    _slice = StreamSlice(partition={}, cursor_slice={"end_time": "2023-01-04T17:30:19.000Z"})
-    record_cursor_value = "2023-01-03"
-    cursor.close_slice(_slice, Record({cursor_field: record_cursor_value}, _slice))
+    _slice = StreamSlice(partition={}, cursor_slice={"start_time": "01-01-2023", "end_time": "01-04-2023"})
+    first_record = Record({cursor_field: "21-02-2023"}, _slice)
+    cursor.observe(_slice, first_record)
+    second_record = Record({cursor_field: "01-03-2023"}, _slice)
+    cursor.observe(_slice, second_record)
+    cursor.close_slice(_slice, second_record)
-    assert cursor.get_stream_state()[cursor_field] == "2023-01-04T17:30:19.000Z"
+    assert cursor.get_stream_state()[cursor_field] == "01-03-2023"
-def test_given_partition_end_is_specified_and_greater_than_record_when_close_slice_then_use_partition_end():
-    partition_field_end = "partition_field_end"
+def test_given_different_format_and_slice_is_highest_when_close_slice_then_state_uses_record_format():
     cursor = DatetimeBasedCursor(
         start_datetime=MinMaxDatetime(datetime="2021-01-01T00:00:00.000000+0000", parameters={}),
-        cursor_field=InterpolatedString(string=cursor_field, parameters={}),
-        datetime_format="%Y-%m-%d",
-        partition_field_end=partition_field_end,
+        cursor_field=cursor_field,
+        datetime_format="%Y-%m-%dT%H:%M:%S.%fZ",
+        cursor_datetime_formats=["%Y-%m-%d"],
         config=config,
         parameters={},
     )
-    stream_slice = StreamSlice(partition={}, cursor_slice={partition_field_end: "2025-01-01"})
-    cursor.close_slice(stream_slice, Record({cursor_field: "2020-01-01"}, stream_slice))
-    updated_state = cursor.get_stream_state()
-    assert {cursor_field: "2025-01-01"} == updated_state
+    _slice = StreamSlice(partition={}, cursor_slice={"start_time": "2023-01-01T17:30:19.000Z", "end_time": "2023-01-04T17:30:19.000Z"})
+    record_cursor_value = "2023-01-03"
+    record = Record({cursor_field: record_cursor_value}, _slice)
+    cursor.observe(_slice, record)
+    cursor.close_slice(_slice, record)
+    assert cursor.get_stream_state()[cursor_field] == "2023-01-03"
 @pytest.mark.parametrize(

unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py CHANGED Viewed

@@ -200,14 +200,14 @@ def test_given_record_for_partition_when_read_then_update_state():
         "states": [
             {
                 "partition": {"partition_field": "1"},
-                "cursor": {CURSOR_FIELD: "2022-01-31"},
+                "cursor": {CURSOR_FIELD: "2022-01-15"},
             }
         ]
     }
 def test_substream_without_input_state():
-    source = ManifestDeclarativeSource(
+    test_source = ManifestDeclarativeSource(
         source_config=ManifestBuilder()
         .with_substream_partition_router("AnotherStream")
         .with_incremental_sync(
@@ -231,14 +231,14 @@ def test_substream_without_input_state():
         .build()
     )
-    stream_instance = source.streams({})[1]
+    stream_instance = test_source.streams({})[1]
     stream_slice = StreamSlice(partition={"parent_id": "1"},
                                cursor_slice={"start_time": "2022-01-01", "end_time": "2022-01-31"})
     with patch.object(
             SimpleRetriever, "_read_pages", side_effect=[[Record({"id": "1", CURSOR_FIELD: "2022-01-15"}, stream_slice)],
-                                                         Record({"id": "2", CURSOR_FIELD: "2022-01-15"}, stream_slice)]
+                                                         [Record({"id": "2", CURSOR_FIELD: "2022-01-15"}, stream_slice)]]
     ):
         slices = list(stream_instance.stream_slices(sync_mode=SYNC_MODE))
         assert list(slices) == [
@@ -246,6 +246,10 @@ def test_substream_without_input_state():
                         cursor_slice={"start_time": "2022-01-01", "end_time": "2022-01-31"}),
             StreamSlice(partition={"parent_id": "1", "parent_slice": {}, },
                         cursor_slice={"start_time": "2022-02-01", "end_time": "2022-02-28"}),
+            StreamSlice(partition={"parent_id": "2", "parent_slice": {}, },
+                        cursor_slice={"start_time": "2022-01-01", "end_time": "2022-01-31"}),
+            StreamSlice(partition={"parent_id": "2", "parent_slice": {}, },
+                        cursor_slice={"start_time": "2022-02-01", "end_time": "2022-02-28"}),
         ]
@@ -307,7 +311,7 @@ def test_substream_with_legacy_input_state():
     with patch.object(
             SimpleRetriever, "_read_pages", side_effect=[
                 [Record({"id": "1", CURSOR_FIELD: "2022-01-15"}, stream_slice)],
-                [Record({"parent_id": "1"}, stream_slice)],
+                [Record({"parent_id": "1", CURSOR_FIELD: "2022-01-15"}, stream_slice)],
                 [Record({"id": "2", CURSOR_FIELD: "2022-01-15"}, stream_slice)],
                 [Record({"parent_id": "2", CURSOR_FIELD: "2022-01-15"}, stream_slice)]
             ]
@@ -319,7 +323,7 @@ def test_substream_with_legacy_input_state():
         expected_state = {"states": [
             {
                 "cursor": {
-                    "cursor_field": "2022-01-31"
+                    CURSOR_FIELD: "2022-01-15"
                 },
                 "partition": {"parent_id": "1", "parent_slice": {}}
             }

unit_tests/sources/declarative/parsers/test_model_to_component_factory.py CHANGED Viewed

@@ -5,6 +5,7 @@
 # mypy: ignore-errors
 import datetime
+from typing import Any, Mapping
 import pytest
 from airbyte_cdk.models import Level
@@ -27,6 +28,7 @@ from airbyte_cdk.sources.declarative.models import CheckStream as CheckStreamMod
 from airbyte_cdk.sources.declarative.models import CompositeErrorHandler as CompositeErrorHandlerModel
 from airbyte_cdk.sources.declarative.models import CustomErrorHandler as CustomErrorHandlerModel
 from airbyte_cdk.sources.declarative.models import CustomPartitionRouter as CustomPartitionRouterModel
+from airbyte_cdk.sources.declarative.models import CustomSchemaLoader as CustomSchemaLoaderModel
 from airbyte_cdk.sources.declarative.models import DatetimeBasedCursor as DatetimeBasedCursorModel
 from airbyte_cdk.sources.declarative.models import DeclarativeStream as DeclarativeStreamModel
 from airbyte_cdk.sources.declarative.models import DefaultPaginator as DefaultPaginatorModel
@@ -66,6 +68,7 @@ from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
 from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod
 from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever, SimpleRetrieverTestReadDecorator
 from airbyte_cdk.sources.declarative.schema import JsonFileSchemaLoader
+from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
 from airbyte_cdk.sources.declarative.spec import Spec
 from airbyte_cdk.sources.declarative.stream_slicers import CartesianProductStreamSlicer
 from airbyte_cdk.sources.declarative.transformations import AddFields, RemoveFields
@@ -1820,3 +1823,19 @@ def test_create_offset_increment():
     assert strategy.page_size == expected_strategy.page_size
     assert strategy.inject_on_first_request == expected_strategy.inject_on_first_request
     assert strategy.config == input_config
+class MyCustomSchemaLoader(SchemaLoader):
+    def get_json_schema(self) -> Mapping[str, Any]:
+        """Returns a mapping describing the stream's schema"""
+        return {}
+def test_create_custom_schema_loader():
+    definition = {
+        "type": "CustomSchemaLoader",
+        "class_name": "unit_tests.sources.declarative.parsers.test_model_to_component_factory.MyCustomSchemaLoader"
+    }
+    component = factory.create_component(CustomSchemaLoaderModel, definition, {})
+    assert isinstance(component, MyCustomSchemaLoader)

unit_tests/sources/declarative/retrievers/test_simple_retriever.py CHANGED Viewed

@@ -477,6 +477,7 @@ def test_given_stream_data_is_not_record_when_read_records_then_update_slice_wit
         side_effect=retriever_read_pages,
     ):
         list(retriever.read_records(stream_slice=stream_slice, records_schema={}))
+        cursor.observe.assert_not_called()
         cursor.close_slice.assert_called_once_with(stream_slice, None)

{airbyte_cdk-0.70.2.dist-info → airbyte_cdk-0.72.0.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{airbyte_cdk-0.70.2.dist-info → airbyte_cdk-0.72.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{airbyte_cdk-0.70.2.dist-info → airbyte_cdk-0.72.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

airbyte-cdk 0.70.2__py3-none-any.whl → 0.72.0__py3-none-any.whl

airbyte-cdk 0.70.2py3-none-any.whl → 0.72.0py3-none-any.whl