airbyte-cdk 0.70.2__py3-none-any.whl → 0.72.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -602,6 +602,27 @@ definitions:
602
602
  $parameters:
603
603
  type: object
604
604
  additionalProperties: true
605
+ CustomSchemaLoader:
606
+ title: Custom Schema Loader
607
+ description: Schema Loader component whose behavior is derived from a custom code implementation of the connector.
608
+ type: object
609
+ additionalProperties: true
610
+ required:
611
+ - type
612
+ - class_name
613
+ properties:
614
+ type:
615
+ type: string
616
+ enum: [CustomSchemaLoader]
617
+ class_name:
618
+ title: Class Name
619
+ description: Fully-qualified name of the class that will be implementing the custom schema loader. The format is `source_<name>.<package>.<class_name>`.
620
+ type: string
621
+ examples:
622
+ - "source_railz.components.MyCustomSchemaLoader"
623
+ $parameters:
624
+ type: object
625
+ additionalProperties: true
605
626
  CustomTransformation:
606
627
  title: Custom Transformation
607
628
  description: Transformation component whose behavior is derived from a custom code implementation of the connector.
@@ -948,6 +969,7 @@ definitions:
948
969
  anyOf:
949
970
  - "$ref": "#/definitions/InlineSchemaLoader"
950
971
  - "$ref": "#/definitions/JsonFileSchemaLoader"
972
+ - "$ref": "#/definitions/CustomSchemaLoader"
951
973
  # TODO we have move the transformation to the RecordSelector level in the code but kept this here for
952
974
  # compatibility reason. We should eventually move this to align with the code.
953
975
  transformations:
@@ -24,18 +24,28 @@ class Cursor(ABC, StreamSlicer):
24
24
  :param stream_state: The state of the stream as returned by get_stream_state
25
25
  """
26
26
 
27
+ def observe(self, stream_slice: StreamSlice, record: Record) -> None:
28
+ """
29
+ Register a record with the cursor; the cursor instance can then use it to manage the state of the in-progress stream read.
30
+
31
+ :param stream_slice: The current slice, which may or may not contain the most recently observed record
32
+ :param record: the most recently-read record, which the cursor can use to update the stream state. Outwardly-visible changes to the
33
+ stream state may need to be deferred depending on whether the source reliably orders records by the cursor field.
34
+ """
35
+ pass
36
+
27
37
  @abstractmethod
28
38
  def close_slice(self, stream_slice: StreamSlice, most_recent_record: Optional[Record]) -> None:
29
39
  """
30
40
  Update state based on the stream slice and the latest record. Note that `stream_slice.cursor_slice` and
31
- `last_record.associated_slice` are expected to be the same but we make it explicit here that `stream_slice` should be leveraged to
41
+ `most_recent_record.associated_slice` are expected to be the same but we make it explicit here that `stream_slice` should be leveraged to
32
42
  update the state.
33
43
 
34
44
  :param stream_slice: slice to close
35
- :param last_record: the latest record we have received for the slice. This is important to consider because even if the cursor emits
36
- a slice, some APIs are not able to enforce the upper boundary. The outcome is that the last_record might have a higher cursor
37
- value than the slice upper boundary and if we want to reduce the duplication as much as possible, we need to consider the highest
38
- value between the internal cursor, the stream slice upper boundary and the record cursor value.
45
+ :param most_recent_record: the latest record we have received for the slice. This is important to consider because even if the
46
+ cursor emits a slice, some APIs are not able to enforce the upper boundary. The outcome is that the last_record might have a
47
+ higher cursor value than the slice upper boundary and if we want to reduce the duplication as much as possible, we need to
48
+ consider the highest value between the internal cursor, the stream slice upper boundary and the record cursor value.
39
49
  """
40
50
 
41
51
  @abstractmethod
@@ -52,7 +52,12 @@ class DatetimeBasedCursor(Cursor):
52
52
  datetime_format: str
53
53
  config: Config
54
54
  parameters: InitVar[Mapping[str, Any]]
55
- _cursor: Optional[str] = field(repr=False, default=None) # tracks current datetime
55
+ _highest_observed_cursor_field_value: Optional[str] = field(
56
+ repr=False, default=None
57
+ ) # tracks the latest observed datetime, which may not be safe to emit in the case of out-of-order records
58
+ _cursor: Optional[str] = field(
59
+ repr=False, default=None
60
+ ) # tracks the latest observed datetime that is appropriate to emit as stream state
56
61
  end_datetime: Optional[Union[MinMaxDatetime, str]] = None
57
62
  step: Optional[Union[InterpolatedString, str]] = None
58
63
  cursor_granularity: Optional[str] = None
@@ -109,20 +114,39 @@ class DatetimeBasedCursor(Cursor):
109
114
  """
110
115
  self._cursor = stream_state.get(self._cursor_field.eval(self.config)) if stream_state else None
111
116
 
112
- def close_slice(self, stream_slice: StreamSlice, most_recent_record: Optional[Record]) -> None:
117
+ def observe(self, stream_slice: StreamSlice, record: Record) -> None:
118
+ """
119
+ Register a record with the cursor; the cursor instance can then use it to manage the state of the in-progress stream read.
120
+
121
+ :param stream_slice: The current slice, which may or may not contain the most recently observed record
122
+ :param record: the most recently-read record, which the cursor can use to update the stream state. Outwardly-visible changes to the
123
+ stream state may need to be deferred depending on whether the source reliably orders records by the cursor field.
124
+ """
125
+ record_cursor_value = record.get(self._cursor_field.eval(self.config))
126
+ # if the current record has no cursor value, we cannot meaningfully update the state based on it, so there is nothing more to do
127
+ if not record_cursor_value:
128
+ return
129
+
130
+ start_field = self._partition_field_start.eval(self.config)
131
+ end_field = self._partition_field_end.eval(self.config)
132
+ is_highest_observed_cursor_value = not self._highest_observed_cursor_field_value or self.parse_date(
133
+ record_cursor_value
134
+ ) > self.parse_date(self._highest_observed_cursor_field_value)
135
+ if (
136
+ self._is_within_daterange_boundaries(record, stream_slice.get(start_field), stream_slice.get(end_field)) # type: ignore # we know that stream_slices for these cursors will use a string representing an unparsed date
137
+ and is_highest_observed_cursor_value
138
+ ):
139
+ self._highest_observed_cursor_field_value = record_cursor_value
140
+
141
+ def close_slice(self, stream_slice: StreamSlice, _most_recent_record: Optional[Record]) -> None:
113
142
  if stream_slice.partition:
114
143
  raise ValueError(f"Stream slice {stream_slice} should not have a partition. Got {stream_slice.partition}.")
115
- last_record_cursor_value = most_recent_record.get(self._cursor_field.eval(self.config)) if most_recent_record else None
116
- stream_slice_value_end = stream_slice.get(self._partition_field_end.eval(self.config))
117
- potential_cursor_values = [
118
- cursor_value for cursor_value in [self._cursor, last_record_cursor_value, stream_slice_value_end] if cursor_value
119
- ]
120
144
  cursor_value_str_by_cursor_value_datetime = dict(
121
145
  map(
122
146
  # we need to ensure the cursor value is preserved as is in the state else the CATs might complain of something like
123
147
  # 2023-01-04T17:30:19.000Z' <= '2023-01-04T17:30:19.000000Z'
124
- lambda datetime_str: (self.parse_date(datetime_str), datetime_str),
125
- potential_cursor_values,
148
+ lambda datetime_str: (self.parse_date(datetime_str), datetime_str), # type: ignore # because of the filter on the next line, this will only be called with a str
149
+ filter(lambda item: item, [self._cursor, self._highest_observed_cursor_field_value]),
126
150
  )
127
151
  )
128
152
  self._cursor = (
@@ -279,10 +303,26 @@ class DatetimeBasedCursor(Cursor):
279
303
  f"Could not find cursor field `{cursor_field}` in record. The incremental sync will assume it needs to be synced",
280
304
  )
281
305
  return True
282
-
283
306
  latest_possible_cursor_value = self._select_best_end_datetime()
284
307
  earliest_possible_cursor_value = self._calculate_earliest_possible_value(latest_possible_cursor_value)
285
- return earliest_possible_cursor_value <= self.parse_date(record_cursor_value) <= latest_possible_cursor_value
308
+ return self._is_within_daterange_boundaries(record, earliest_possible_cursor_value, latest_possible_cursor_value)
309
+
310
+ def _is_within_daterange_boundaries(
311
+ self, record: Record, start_datetime_boundary: Union[datetime.datetime, str], end_datetime_boundary: Union[datetime.datetime, str]
312
+ ) -> bool:
313
+ cursor_field = self._cursor_field.eval(self.config)
314
+ record_cursor_value = record.get(cursor_field)
315
+ if not record_cursor_value:
316
+ self._send_log(
317
+ Level.WARN,
318
+ f"Could not find cursor field `{cursor_field}` in record. The record will not be considered when emitting sync state",
319
+ )
320
+ return False
321
+ if isinstance(start_datetime_boundary, str):
322
+ start_datetime_boundary = self.parse_date(start_datetime_boundary)
323
+ if isinstance(end_datetime_boundary, str):
324
+ end_datetime_boundary = self.parse_date(end_datetime_boundary)
325
+ return start_datetime_boundary <= self.parse_date(record_cursor_value) <= end_datetime_boundary
286
326
 
287
327
  def _send_log(self, level: Level, message: str) -> None:
288
328
  if self.message_repository:
@@ -86,6 +86,11 @@ class PerPartitionCursor(Cursor):
86
86
  for state in stream_state["states"]:
87
87
  self._cursor_per_partition[self._to_partition_key(state["partition"])] = self._create_cursor(state["cursor"])
88
88
 
89
+ def observe(self, stream_slice: StreamSlice, record: Record) -> None:
90
+ self._cursor_per_partition[self._to_partition_key(stream_slice.partition)].observe(
91
+ StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice), record
92
+ )
93
+
89
94
  def close_slice(self, stream_slice: StreamSlice, most_recent_record: Optional[Record]) -> None:
90
95
  try:
91
96
  cursor_most_recent_record = (
@@ -208,6 +208,20 @@ class CustomPartitionRouter(BaseModel):
208
208
  parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters')
209
209
 
210
210
 
211
+ class CustomSchemaLoader(BaseModel):
212
+ class Config:
213
+ extra = Extra.allow
214
+
215
+ type: Literal['CustomSchemaLoader']
216
+ class_name: str = Field(
217
+ ...,
218
+ description='Fully-qualified name of the class that will be implementing the custom schema loader. The format is `source_<name>.<package>.<class_name>`.',
219
+ examples=['source_railz.components.MyCustomSchemaLoader'],
220
+ title='Class Name',
221
+ )
222
+ parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters')
223
+
224
+
211
225
  class CustomTransformation(BaseModel):
212
226
  class Config:
213
227
  extra = Extra.allow
@@ -1161,7 +1175,9 @@ class DeclarativeStream(BaseModel):
1161
1175
  primary_key: Optional[PrimaryKey] = Field(
1162
1176
  '', description='The primary key of the stream.', title='Primary Key'
1163
1177
  )
1164
- schema_loader: Optional[Union[InlineSchemaLoader, JsonFileSchemaLoader]] = Field(
1178
+ schema_loader: Optional[
1179
+ Union[InlineSchemaLoader, JsonFileSchemaLoader, CustomSchemaLoader]
1180
+ ] = Field(
1165
1181
  None,
1166
1182
  description='Component used to retrieve the schema for the current stream.',
1167
1183
  title='Schema Loader',
@@ -49,6 +49,7 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
49
49
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomRecordFilter as CustomRecordFilterModel
50
50
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomRequester as CustomRequesterModel
51
51
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomRetriever as CustomRetrieverModel
52
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomSchemaLoader as CustomSchemaLoader
52
53
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomTransformation as CustomTransformationModel
53
54
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import DatetimeBasedCursor as DatetimeBasedCursorModel
54
55
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import DeclarativeStream as DeclarativeStreamModel
@@ -165,6 +166,7 @@ class ModelToComponentFactory:
165
166
  CustomRecordFilterModel: self.create_custom_component,
166
167
  CustomRequesterModel: self.create_custom_component,
167
168
  CustomRetrieverModel: self.create_custom_component,
169
+ CustomSchemaLoader: self.create_custom_component,
168
170
  CustomPaginationStrategyModel: self.create_custom_component,
169
171
  CustomPartitionRouterModel: self.create_custom_component,
170
172
  CustomTransformationModel: self.create_custom_component,
@@ -322,7 +322,13 @@ class SimpleRetriever(Retriever):
322
322
  records_schema=records_schema,
323
323
  )
324
324
  for stream_data in self._read_pages(record_generator, self.state, _slice):
325
- most_recent_record_from_slice = self._get_most_recent_record(most_recent_record_from_slice, stream_data, _slice)
325
+ current_record = self._extract_record(stream_data, _slice)
326
+ if self.cursor and current_record:
327
+ self.cursor.observe(_slice, current_record)
328
+
329
+ # TODO this is just the most recent record *read*, not necessarily the most recent record *within slice boundaries*; once all
330
+ # cursors implement a meaningful `observe` method, it can be removed, both from here and the `Cursor.close_slice` method args
331
+ most_recent_record_from_slice = self._get_most_recent_record(most_recent_record_from_slice, current_record, _slice)
326
332
  yield stream_data
327
333
 
328
334
  if self.cursor:
@@ -330,13 +336,13 @@ class SimpleRetriever(Retriever):
330
336
  return
331
337
 
332
338
  def _get_most_recent_record(
333
- self, current_most_recent: Optional[Record], stream_data: StreamData, stream_slice: StreamSlice
339
+ self, current_most_recent: Optional[Record], current_record: Optional[Record], stream_slice: StreamSlice
334
340
  ) -> Optional[Record]:
335
- if self.cursor and (record := self._extract_record(stream_data, stream_slice)):
341
+ if self.cursor and current_record:
336
342
  if not current_most_recent:
337
- return record
343
+ return current_record
338
344
  else:
339
- return current_most_recent if self.cursor.is_greater_than_or_equal(current_most_recent, record) else record
345
+ return current_most_recent if self.cursor.is_greater_than_or_equal(current_most_recent, current_record) else current_record
340
346
  else:
341
347
  return None
342
348
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 0.70.2
3
+ Version: 0.72.0
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://github.com/airbytehq/airbyte
6
6
  Author: Airbyte
@@ -38,7 +38,7 @@ airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py
38
38
  airbyte_cdk/sources/concurrent_source/thread_pool_manager.py,sha256=hFj5rsRtORurl3fwH8GC9h6Uz2wbzBFOLWUxJ-YJ7J8,4801
39
39
  airbyte_cdk/sources/declarative/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
40
40
  airbyte_cdk/sources/declarative/create_partial.py,sha256=sUJOwD8hBzW4pxw2XhYlSTMgl-WMc5WpP5Oq_jo3fHw,3371
41
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=An4UXx4e_GodeVd0bSQTv1G_Z1yjUcb7NbOmcC9-i9I,89327
41
+ airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=netrMub3A9k9wk5VWx8vqDWhfeLk_sviHHJ8NXnH2OA,90111
42
42
  airbyte_cdk/sources/declarative/declarative_source.py,sha256=U2As9PDKmcWDgbsWUo-RetJ9fxQOBlwntWZ0NOgs5Ac,1453
43
43
  airbyte_cdk/sources/declarative/declarative_stream.py,sha256=9nBjSBilzH2aeJsUEqOLyc4G2RRjlPZCapHDMv4jnOU,6691
44
44
  airbyte_cdk/sources/declarative/exceptions.py,sha256=kTPUA4I2NV4J6HDz-mKPGMrfuc592akJnOyYx38l_QM,176
@@ -67,9 +67,9 @@ airbyte_cdk/sources/declarative/extractors/record_extractor.py,sha256=-p9X6UV3iS
67
67
  airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=kH5DrBHr6DdpmGqWx4aFRXkprL-VGEHI5BcG3A-0Cjg,1394
68
68
  airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=m3IzOp_wo6QnQXQ3bpxROmHA0P_YeuPDIpBlWvyBXq0,4366
69
69
  airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=BYzVRQ4MSmhLCajgTi1Y_FHlwCBTdsMDT6zRmYMytws,425
70
- airbyte_cdk/sources/declarative/incremental/cursor.py,sha256=cGAYP-Std-_MNsX4KGIP4FwDff6WdECV7CMgMi1uKSg,2890
71
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=B-4CmFwv6zXDHYJ2NFe-Ct-n360YMNSZ4ruxGe8cuSg,15802
72
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=8Oq6pgbQjAlkQCPAgOoa-fK9Yb6iyx1oYqdsaMjamT0,12241
70
+ airbyte_cdk/sources/declarative/incremental/cursor.py,sha256=KgKGGgVY_JJo4JHRafo5__61Xu3hVfTvDKoSSM6AmTA,3523
71
+ airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=uLFSm5K8oBbbQXDrAYDHY_B55Nzwv27m4Qvgegqx5GM,18384
72
+ airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=pXVty1WnW2YJ013yoApYgNBdEat47XjgKCMhLS7RkEg,12504
73
73
  airbyte_cdk/sources/declarative/interpolation/__init__.py,sha256=tjUJkn3B-iZ-p7RP2c3dVZejrGiQeooGmS5ibWTuUL4,437
74
74
  airbyte_cdk/sources/declarative/interpolation/filters.py,sha256=V5XL-IEFNn08YdkJl4A54-G73qJ8P7WAQOYgf1-fXzQ,2809
75
75
  airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py,sha256=p5XbZB1cvP_U6HBBHX4PIFlXMHB9vdhSeZ5N3N8AuBY,1835
@@ -80,14 +80,14 @@ airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=dyIM-bzh54
80
80
  airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=hOLBs9VaaE5xsT2wY2VxSrISE165bu_Egb83ordG4XI,5379
81
81
  airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=V6WGKJ9cXX1rjuM4bK3Cs9xEryMlkY2U3FMsSBhrgC8,3098
82
82
  airbyte_cdk/sources/declarative/models/__init__.py,sha256=EiYnzwCHZV7EYqMJqcy6xKSeHvTKZBsQndjbEwmiTW4,93
83
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=u6i4nYufXZe27bIED7uhsvfhghOMN4NdWoGLEeTjzwk,61032
83
+ airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=1a67m2fGAdBRf7rOzvk5SIluQHIWL4SPLnrjsnrnm_s,61574
84
84
  airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
85
85
  airbyte_cdk/sources/declarative/parsers/class_types_registry.py,sha256=5vOvMuyWlpALrOq2ehLxa7wO6tlFIlgUNtMYrMCKIjE,6092
86
86
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=y7_G5mM07zxT5YG975kdC2PAja-Uc83pYp8WrV3GNdo,522
87
87
  airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py,sha256=W8BcK4KOg4ifNXgsdeIoV4oneHjXBKcPHEZHIC4r-hM,3801
88
88
  airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=i2yUdrdlPUHI0dSQX0zBT8WSg912SMiCF8qkQ8VvlA4,8287
89
89
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=6ukHx0bBrCJm9rek1l_MEfS3U_gdJcM4pJRyifJEOp0,6412
90
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=_jX1sPW5SA51nHoxpoVktC3jdO8PR4cGrQfZftcVSLo,59316
90
+ airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=1zI1Mk9k_3p_TMz6LsgV7U54CJ6etl88q8WOv4AZO-w,59499
91
91
  airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=27sOWhw2LBQs62HchURakHQ2M_mtnOatNgU6q8RUtpU,476
92
92
  airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha256=L22D-up7W2HahZZo2dA-IbRSs7qJEahU6O6bU0eiIt8,4324
93
93
  airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py,sha256=cl-TQdu_m6_IF20gdD1jll0SpejIyMZHvyGXx2NafuI,1611
@@ -128,7 +128,7 @@ airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_
128
128
  airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py,sha256=mCdh3UoAZoLycm--JfWDxXcjMKI2j6bFkRZRdOz67xc,2602
129
129
  airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=IiHXDeKtibRqeWcRUckmSiXfk--u-sFMw3APWK8PCGQ,339
130
130
  airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=h3wI68k9NxYE39jMZPOzL72XYTcScFvVeev-DZ_nPoo,1753
131
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=1r-xK2D-yZPNpOKCaU9z500r5T96jWV7io-sjmh3QkQ,19204
131
+ airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=yHciSsy1CK-NyfWRO4B_ySLF35itv6tkrCBaL_ZSAO4,19650
132
132
  airbyte_cdk/sources/declarative/schema/__init__.py,sha256=ul8L9S0-__AMEdbCLHBq-PMEeA928NVp8BB83BMotfU,517
133
133
  airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=t0ll098cIG2Wr1rq1rZ3QDZ9WnScUuqAh42YVoTRWrU,1794
134
134
  airbyte_cdk/sources/declarative/schema/inline_schema_loader.py,sha256=bVETE10hRsatRJq3R3BeyRR0wIoK3gcP1gcpVRQ_P5U,464
@@ -315,9 +315,9 @@ unit_tests/sources/declarative/extractors/test_dpath_extractor.py,sha256=-bgWKAi
315
315
  unit_tests/sources/declarative/extractors/test_record_filter.py,sha256=mcR6Zc3BoVDm_hkmx3J3zFShi2CdudqxR2U1JRxkgzA,2329
316
316
  unit_tests/sources/declarative/extractors/test_record_selector.py,sha256=06gLLRwom45YtdsKm9OUabpkioXSDKEnv0DsDTvItC4,6884
317
317
  unit_tests/sources/declarative/incremental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
318
- unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py,sha256=cvgBYxAJM-_w7ABOL66vZVPr7PBHtt7YGCALIeq9I9Q,36375
318
+ unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py,sha256=R-QX1npdwzGShcxY3_Zrbm1jP0sxyZ8AcOe2O-vtWdo,38486
319
319
  unit_tests/sources/declarative/incremental/test_per_partition_cursor.py,sha256=Xj3vYxB2kbhOZkJ9p2MXuOLq0FwU17UoxM1vuRaN1_k,20402
320
- unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py,sha256=S4Vg2qf7RVRRxw5DkBjSOOFc_LCKUs7fZ8Qtf4WGB08,12519
320
+ unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py,sha256=SeEb7jfXRPO7bbQVtm1eeo-JOR4PQXDkWbOA3fc89Kw,12897
321
321
  unit_tests/sources/declarative/interpolation/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
322
322
  unit_tests/sources/declarative/interpolation/test_filters.py,sha256=gPGDNPeLu87rj1WcVVpLsosh6Dzgh0ihsyDkEIdGI_E,2388
323
323
  unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py,sha256=vr45nStygl09eNsFYN_YiaqcLBWse9OW_wMc7orHoHU,1804
@@ -329,7 +329,7 @@ unit_tests/sources/declarative/interpolation/test_macros.py,sha256=vEZmHQ0KsfQUz
329
329
  unit_tests/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
330
330
  unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py,sha256=egePHWYcXprfPtoHhiquWAXuJkDr-DB_RakKhdyaoHs,14316
331
331
  unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py,sha256=K3q9eyx-sJFQ8nGYjAgS7fxau4sX_FlNreEAjiCYOeE,5306
332
- unit_tests/sources/declarative/parsers/test_model_to_component_factory.py,sha256=WIUygDJvNGEIzNdlarkYymTTgRtiXlR6IOjwnsqlC3E,75683
332
+ unit_tests/sources/declarative/parsers/test_model_to_component_factory.py,sha256=cDlDoNutC6JMGdyvkYMteiHtVrpQ_cKnRE_yn6dWui0,76426
333
333
  unit_tests/sources/declarative/parsers/testing_components.py,sha256=_yUijmYRM-yYHPGDB2JsfEiOuVrgexGW9QwHf1xxNW8,1326
334
334
  unit_tests/sources/declarative/partition_routers/__init__.py,sha256=O8MZg4Bv_DghdRy9BoJCPIqdV75VtiUrhEkExQgb2nE,61
335
335
  unit_tests/sources/declarative/partition_routers/test_list_partition_router.py,sha256=WKdbAQCHfCVOyoAFM_kbHsbqAF_e5FX5Zvou5ARsJZ4,6572
@@ -360,7 +360,7 @@ unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py,sha2
360
360
  unit_tests/sources/declarative/requesters/request_options/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
361
361
  unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py,sha256=bjcaTb8I37tBhs5b_FLRTLkDZAmKjGRywpcN4oGl-zI,5900
362
362
  unit_tests/sources/declarative/retrievers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
363
- unit_tests/sources/declarative/retrievers/test_simple_retriever.py,sha256=mDF2m-2hqTr7VkrdoIffE8vMaUU9sv5D4At0DVtgEJw,20273
363
+ unit_tests/sources/declarative/retrievers/test_simple_retriever.py,sha256=TLd9k1GsV0kjl2rVvObUzoKWFcYW0ILBvOOJVTzqxZ4,20316
364
364
  unit_tests/sources/declarative/schema/__init__.py,sha256=i-iWyCqXPVgY-4miy16FH8U06gW_1_49AVq_8S8rVWY,134
365
365
  unit_tests/sources/declarative/schema/test_default_schema_loader.py,sha256=cWOFJnT9fhcEU6XLHkoe3E83mCjWc8lEttT0PFcvAm8,1091
366
366
  unit_tests/sources/declarative/schema/test_inline_schema_loader.py,sha256=vDJauhZ8og8M9ZqKDbf12SSYSfhUZ0_LmH7zjJHCHwI,517
@@ -459,8 +459,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg
459
459
  unit_tests/utils/test_secret_utils.py,sha256=CdKK8A2-5XVxbXVtX22FK9dwwMeP5KNqDH6luWRXSNw,5256
460
460
  unit_tests/utils/test_stream_status_utils.py,sha256=Xr8MZ2HWgTVIyMbywDvuYkRaUF4RZLQOT8-JjvcfR24,2970
461
461
  unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
462
- airbyte_cdk-0.70.2.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
463
- airbyte_cdk-0.70.2.dist-info/METADATA,sha256=2B0x5Y9M3ZyRPwYmXjynUjYCeFnTldJIPQCAcAuqYMs,11074
464
- airbyte_cdk-0.70.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
465
- airbyte_cdk-0.70.2.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
466
- airbyte_cdk-0.70.2.dist-info/RECORD,,
462
+ airbyte_cdk-0.72.0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
463
+ airbyte_cdk-0.72.0.dist-info/METADATA,sha256=lDN4hbkJHUsXxycTKKTDAgCzzo72JYBIzlCOZzuU0nM,11074
464
+ airbyte_cdk-0.72.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
465
+ airbyte_cdk-0.72.0.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
466
+ airbyte_cdk-0.72.0.dist-info/RECORD,,
@@ -338,55 +338,96 @@ def test_stream_slices(
338
338
 
339
339
 
340
340
  @pytest.mark.parametrize(
341
- "test_name, previous_cursor, stream_slice, latest_record_data, expected_state",
341
+ "test_name, previous_cursor, stream_slice, observed_records, expected_state",
342
342
  [
343
343
  (
344
344
  "test_close_slice_previous_cursor_is_highest",
345
345
  "2023-01-01",
346
- StreamSlice(partition={}, cursor_slice={"end_time": "2022-01-01"}),
347
- {cursor_field: "2021-01-01"},
346
+ StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
347
+ [{cursor_field: "2021-01-01"}],
348
348
  {cursor_field: "2023-01-01"},
349
349
  ),
350
350
  (
351
351
  "test_close_slice_stream_slice_partition_end_is_highest",
352
- "2021-01-01",
353
- StreamSlice(partition={}, cursor_slice={"end_time": "2023-01-01"}),
352
+ "2020-01-01",
353
+ StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2023-01-01"}),
354
+ [{cursor_field: "2021-01-01"}],
354
355
  {cursor_field: "2021-01-01"},
355
- {cursor_field: "2023-01-01"},
356
356
  ),
357
357
  (
358
- "test_close_slice_latest_record_cursor_value_is_highest",
358
+ "test_close_slice_latest_record_cursor_value_is_higher_than_slice_end",
359
359
  "2021-01-01",
360
- StreamSlice(partition={}, cursor_slice={"end_time": "2022-01-01"}),
361
- {cursor_field: "2023-01-01"},
362
- {cursor_field: "2023-01-01"},
360
+ StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
361
+ [{cursor_field: "2023-01-01"}],
362
+ {cursor_field: "2021-01-01"},
363
363
  ),
364
364
  (
365
- "test_close_slice_without_latest_record",
365
+ "test_close_slice_with_no_records_observed",
366
366
  "2021-01-01",
367
- StreamSlice(partition={}, cursor_slice={"end_time": "2022-01-01"}),
367
+ StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
368
+ [],
369
+ {cursor_field: "2021-01-01"},
370
+ ),
371
+ (
372
+ "test_close_slice_with_no_records_observed_and_no_previous_state",
368
373
  None,
374
+ StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
375
+ [],
376
+ {},
377
+ ),
378
+ (
379
+ "test_close_slice_without_previous_cursor",
380
+ None,
381
+ StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2023-01-01"}),
382
+ [{cursor_field: "2022-01-01"}],
369
383
  {cursor_field: "2022-01-01"},
370
384
  ),
371
385
  (
372
- "test_close_slice_without_cursor",
386
+ "test_close_slice_with_out_of_order_records",
387
+ "2021-01-01",
388
+ StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
389
+ [{cursor_field: "2021-04-01"}, {cursor_field: "2021-02-01"}, {cursor_field: "2021-03-01"}],
390
+ {cursor_field: "2021-04-01"},
391
+ ),
392
+ (
393
+ "test_close_slice_with_some_records_out_of_slice_boundaries",
394
+ "2021-01-01",
395
+ StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
396
+ [{cursor_field: "2021-02-01"}, {cursor_field: "2021-03-01"}, {cursor_field: "2023-01-01"}],
397
+ {cursor_field: "2021-03-01"},
398
+ ),
399
+ (
400
+ "test_close_slice_with_all_records_out_of_slice_boundaries",
401
+ "2021-01-01",
402
+ StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
403
+ [{cursor_field: "2023-01-01"}],
404
+ {cursor_field: "2021-01-01"},
405
+ ),
406
+ (
407
+ "test_close_slice_with_all_records_out_of_slice_and_no_previous_cursor",
373
408
  None,
374
- StreamSlice(partition={}, cursor_slice={"end_time": "2022-01-01"}),
375
- {cursor_field: "2023-01-01"},
376
- {cursor_field: "2023-01-01"},
409
+ StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
410
+ [{cursor_field: "2023-01-01"}],
411
+ {},
377
412
  ),
378
413
  ],
379
414
  )
380
- def test_close_slice(test_name, previous_cursor, stream_slice, latest_record_data, expected_state):
415
+ def test_close_slice(test_name, previous_cursor, stream_slice, observed_records, expected_state):
381
416
  cursor = DatetimeBasedCursor(
382
417
  start_datetime=MinMaxDatetime(datetime="2021-01-01T00:00:00.000000+0000", parameters={}),
383
418
  cursor_field=InterpolatedString(string=cursor_field, parameters={}),
384
419
  datetime_format="%Y-%m-%d",
385
420
  config=config,
386
421
  parameters={},
422
+ partition_field_start="start_time",
423
+ partition_field_end="end_time",
387
424
  )
388
- cursor._cursor = previous_cursor
389
- cursor.close_slice(stream_slice, Record(latest_record_data, stream_slice) if latest_record_data else None)
425
+ cursor.set_initial_state({cursor_field: previous_cursor})
426
+ for record_data in observed_records:
427
+ record = Record(record_data, stream_slice)
428
+ cursor.observe(stream_slice, record)
429
+ last_record = observed_records[-1] if observed_records else None
430
+ cursor.close_slice(stream_slice, Record(last_record, stream_slice) if last_record else None)
390
431
  updated_state = cursor.get_stream_state()
391
432
  assert updated_state == expected_state
392
433
 
@@ -404,37 +445,42 @@ def test_close_slice_fails_if_slice_has_a_partition():
404
445
  cursor.close_slice(stream_slice, Record({"id": 1}, stream_slice))
405
446
 
406
447
 
407
- def test_given_different_format_and_slice_is_highest_when_close_slice_then_slice_datetime_format():
448
+ def test_compares_cursor_values_by_chronological_order():
408
449
  cursor = DatetimeBasedCursor(
409
450
  start_datetime=MinMaxDatetime(datetime="2021-01-01T00:00:00.000000+0000", parameters={}),
410
451
  cursor_field=cursor_field,
411
- datetime_format="%Y-%m-%dT%H:%M:%S.%fZ",
412
- cursor_datetime_formats=["%Y-%m-%d"],
452
+ datetime_format="%d-%m-%Y",
413
453
  config=config,
414
454
  parameters={},
415
455
  )
416
456
 
417
- _slice = StreamSlice(partition={}, cursor_slice={"end_time": "2023-01-04T17:30:19.000Z"})
418
- record_cursor_value = "2023-01-03"
419
- cursor.close_slice(_slice, Record({cursor_field: record_cursor_value}, _slice))
457
+ _slice = StreamSlice(partition={}, cursor_slice={"start_time": "01-01-2023", "end_time": "01-04-2023"})
458
+ first_record = Record({cursor_field: "21-02-2023"}, _slice)
459
+ cursor.observe(_slice, first_record)
460
+ second_record = Record({cursor_field: "01-03-2023"}, _slice)
461
+ cursor.observe(_slice, second_record)
462
+ cursor.close_slice(_slice, second_record)
420
463
 
421
- assert cursor.get_stream_state()[cursor_field] == "2023-01-04T17:30:19.000Z"
464
+ assert cursor.get_stream_state()[cursor_field] == "01-03-2023"
422
465
 
423
466
 
424
- def test_given_partition_end_is_specified_and_greater_than_record_when_close_slice_then_use_partition_end():
425
- partition_field_end = "partition_field_end"
467
+ def test_given_different_format_and_slice_is_highest_when_close_slice_then_state_uses_record_format():
426
468
  cursor = DatetimeBasedCursor(
427
469
  start_datetime=MinMaxDatetime(datetime="2021-01-01T00:00:00.000000+0000", parameters={}),
428
- cursor_field=InterpolatedString(string=cursor_field, parameters={}),
429
- datetime_format="%Y-%m-%d",
430
- partition_field_end=partition_field_end,
470
+ cursor_field=cursor_field,
471
+ datetime_format="%Y-%m-%dT%H:%M:%S.%fZ",
472
+ cursor_datetime_formats=["%Y-%m-%d"],
431
473
  config=config,
432
474
  parameters={},
433
475
  )
434
- stream_slice = StreamSlice(partition={}, cursor_slice={partition_field_end: "2025-01-01"})
435
- cursor.close_slice(stream_slice, Record({cursor_field: "2020-01-01"}, stream_slice))
436
- updated_state = cursor.get_stream_state()
437
- assert {cursor_field: "2025-01-01"} == updated_state
476
+
477
+ _slice = StreamSlice(partition={}, cursor_slice={"start_time": "2023-01-01T17:30:19.000Z", "end_time": "2023-01-04T17:30:19.000Z"})
478
+ record_cursor_value = "2023-01-03"
479
+ record = Record({cursor_field: record_cursor_value}, _slice)
480
+ cursor.observe(_slice, record)
481
+ cursor.close_slice(_slice, record)
482
+
483
+ assert cursor.get_stream_state()[cursor_field] == "2023-01-03"
438
484
 
439
485
 
440
486
  @pytest.mark.parametrize(
@@ -200,14 +200,14 @@ def test_given_record_for_partition_when_read_then_update_state():
200
200
  "states": [
201
201
  {
202
202
  "partition": {"partition_field": "1"},
203
- "cursor": {CURSOR_FIELD: "2022-01-31"},
203
+ "cursor": {CURSOR_FIELD: "2022-01-15"},
204
204
  }
205
205
  ]
206
206
  }
207
207
 
208
208
 
209
209
  def test_substream_without_input_state():
210
- source = ManifestDeclarativeSource(
210
+ test_source = ManifestDeclarativeSource(
211
211
  source_config=ManifestBuilder()
212
212
  .with_substream_partition_router("AnotherStream")
213
213
  .with_incremental_sync(
@@ -231,14 +231,14 @@ def test_substream_without_input_state():
231
231
  .build()
232
232
  )
233
233
 
234
- stream_instance = source.streams({})[1]
234
+ stream_instance = test_source.streams({})[1]
235
235
 
236
236
  stream_slice = StreamSlice(partition={"parent_id": "1"},
237
237
  cursor_slice={"start_time": "2022-01-01", "end_time": "2022-01-31"})
238
238
 
239
239
  with patch.object(
240
240
  SimpleRetriever, "_read_pages", side_effect=[[Record({"id": "1", CURSOR_FIELD: "2022-01-15"}, stream_slice)],
241
- Record({"id": "2", CURSOR_FIELD: "2022-01-15"}, stream_slice)]
241
+ [Record({"id": "2", CURSOR_FIELD: "2022-01-15"}, stream_slice)]]
242
242
  ):
243
243
  slices = list(stream_instance.stream_slices(sync_mode=SYNC_MODE))
244
244
  assert list(slices) == [
@@ -246,6 +246,10 @@ def test_substream_without_input_state():
246
246
  cursor_slice={"start_time": "2022-01-01", "end_time": "2022-01-31"}),
247
247
  StreamSlice(partition={"parent_id": "1", "parent_slice": {}, },
248
248
  cursor_slice={"start_time": "2022-02-01", "end_time": "2022-02-28"}),
249
+ StreamSlice(partition={"parent_id": "2", "parent_slice": {}, },
250
+ cursor_slice={"start_time": "2022-01-01", "end_time": "2022-01-31"}),
251
+ StreamSlice(partition={"parent_id": "2", "parent_slice": {}, },
252
+ cursor_slice={"start_time": "2022-02-01", "end_time": "2022-02-28"}),
249
253
  ]
250
254
 
251
255
 
@@ -307,7 +311,7 @@ def test_substream_with_legacy_input_state():
307
311
  with patch.object(
308
312
  SimpleRetriever, "_read_pages", side_effect=[
309
313
  [Record({"id": "1", CURSOR_FIELD: "2022-01-15"}, stream_slice)],
310
- [Record({"parent_id": "1"}, stream_slice)],
314
+ [Record({"parent_id": "1", CURSOR_FIELD: "2022-01-15"}, stream_slice)],
311
315
  [Record({"id": "2", CURSOR_FIELD: "2022-01-15"}, stream_slice)],
312
316
  [Record({"parent_id": "2", CURSOR_FIELD: "2022-01-15"}, stream_slice)]
313
317
  ]
@@ -319,7 +323,7 @@ def test_substream_with_legacy_input_state():
319
323
  expected_state = {"states": [
320
324
  {
321
325
  "cursor": {
322
- "cursor_field": "2022-01-31"
326
+ CURSOR_FIELD: "2022-01-15"
323
327
  },
324
328
  "partition": {"parent_id": "1", "parent_slice": {}}
325
329
  }
@@ -5,6 +5,7 @@
5
5
  # mypy: ignore-errors
6
6
 
7
7
  import datetime
8
+ from typing import Any, Mapping
8
9
 
9
10
  import pytest
10
11
  from airbyte_cdk.models import Level
@@ -27,6 +28,7 @@ from airbyte_cdk.sources.declarative.models import CheckStream as CheckStreamMod
27
28
  from airbyte_cdk.sources.declarative.models import CompositeErrorHandler as CompositeErrorHandlerModel
28
29
  from airbyte_cdk.sources.declarative.models import CustomErrorHandler as CustomErrorHandlerModel
29
30
  from airbyte_cdk.sources.declarative.models import CustomPartitionRouter as CustomPartitionRouterModel
31
+ from airbyte_cdk.sources.declarative.models import CustomSchemaLoader as CustomSchemaLoaderModel
30
32
  from airbyte_cdk.sources.declarative.models import DatetimeBasedCursor as DatetimeBasedCursorModel
31
33
  from airbyte_cdk.sources.declarative.models import DeclarativeStream as DeclarativeStreamModel
32
34
  from airbyte_cdk.sources.declarative.models import DefaultPaginator as DefaultPaginatorModel
@@ -66,6 +68,7 @@ from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
66
68
  from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod
67
69
  from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever, SimpleRetrieverTestReadDecorator
68
70
  from airbyte_cdk.sources.declarative.schema import JsonFileSchemaLoader
71
+ from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
69
72
  from airbyte_cdk.sources.declarative.spec import Spec
70
73
  from airbyte_cdk.sources.declarative.stream_slicers import CartesianProductStreamSlicer
71
74
  from airbyte_cdk.sources.declarative.transformations import AddFields, RemoveFields
@@ -1820,3 +1823,19 @@ def test_create_offset_increment():
1820
1823
  assert strategy.page_size == expected_strategy.page_size
1821
1824
  assert strategy.inject_on_first_request == expected_strategy.inject_on_first_request
1822
1825
  assert strategy.config == input_config
1826
+
1827
+
1828
+ class MyCustomSchemaLoader(SchemaLoader):
1829
+ def get_json_schema(self) -> Mapping[str, Any]:
1830
+ """Returns a mapping describing the stream's schema"""
1831
+ return {}
1832
+
1833
+
1834
+ def test_create_custom_schema_loader():
1835
+
1836
+ definition = {
1837
+ "type": "CustomSchemaLoader",
1838
+ "class_name": "unit_tests.sources.declarative.parsers.test_model_to_component_factory.MyCustomSchemaLoader"
1839
+ }
1840
+ component = factory.create_component(CustomSchemaLoaderModel, definition, {})
1841
+ assert isinstance(component, MyCustomSchemaLoader)
@@ -477,6 +477,7 @@ def test_given_stream_data_is_not_record_when_read_records_then_update_slice_wit
477
477
  side_effect=retriever_read_pages,
478
478
  ):
479
479
  list(retriever.read_records(stream_slice=stream_slice, records_schema={}))
480
+ cursor.observe.assert_not_called()
480
481
  cursor.close_slice.assert_called_once_with(stream_slice, None)
481
482
 
482
483