airbyte-cdk 0.70.2__py3-none-any.whl → 0.72.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -602,6 +602,27 @@ definitions:
602
602
  $parameters:
603
603
  type: object
604
604
  additionalProperties: true
605
+ CustomSchemaLoader:
606
+ title: Custom Schema Loader
607
+ description: Schema Loader component whose behavior is derived from a custom code implementation of the connector.
608
+ type: object
609
+ additionalProperties: true
610
+ required:
611
+ - type
612
+ - class_name
613
+ properties:
614
+ type:
615
+ type: string
616
+ enum: [CustomSchemaLoader]
617
+ class_name:
618
+ title: Class Name
619
+ description: Fully-qualified name of the class that will be implementing the custom schema loader. The format is `source_<name>.<package>.<class_name>`.
620
+ type: string
621
+ examples:
622
+ - "source_railz.components.MyCustomSchemaLoader"
623
+ $parameters:
624
+ type: object
625
+ additionalProperties: true
605
626
  CustomTransformation:
606
627
  title: Custom Transformation
607
628
  description: Transformation component whose behavior is derived from a custom code implementation of the connector.
@@ -948,6 +969,7 @@ definitions:
948
969
  anyOf:
949
970
  - "$ref": "#/definitions/InlineSchemaLoader"
950
971
  - "$ref": "#/definitions/JsonFileSchemaLoader"
972
+ - "$ref": "#/definitions/CustomSchemaLoader"
951
973
  # TODO we have move the transformation to the RecordSelector level in the code but kept this here for
952
974
  # compatibility reason. We should eventually move this to align with the code.
953
975
  transformations:
@@ -24,18 +24,28 @@ class Cursor(ABC, StreamSlicer):
24
24
  :param stream_state: The state of the stream as returned by get_stream_state
25
25
  """
26
26
 
27
+ def observe(self, stream_slice: StreamSlice, record: Record) -> None:
28
+ """
29
+ Register a record with the cursor; the cursor instance can then use it to manage the state of the in-progress stream read.
30
+
31
+ :param stream_slice: The current slice, which may or may not contain the most recently observed record
32
+ :param record: the most recently-read record, which the cursor can use to update the stream state. Outwardly-visible changes to the
33
+ stream state may need to be deferred depending on whether the source reliably orders records by the cursor field.
34
+ """
35
+ pass
36
+
27
37
  @abstractmethod
28
38
  def close_slice(self, stream_slice: StreamSlice, most_recent_record: Optional[Record]) -> None:
29
39
  """
30
40
  Update state based on the stream slice and the latest record. Note that `stream_slice.cursor_slice` and
31
- `last_record.associated_slice` are expected to be the same but we make it explicit here that `stream_slice` should be leveraged to
41
+ `most_recent_record.associated_slice` are expected to be the same but we make it explicit here that `stream_slice` should be leveraged to
32
42
  update the state.
33
43
 
34
44
  :param stream_slice: slice to close
35
- :param last_record: the latest record we have received for the slice. This is important to consider because even if the cursor emits
36
- a slice, some APIs are not able to enforce the upper boundary. The outcome is that the last_record might have a higher cursor
37
- value than the slice upper boundary and if we want to reduce the duplication as much as possible, we need to consider the highest
38
- value between the internal cursor, the stream slice upper boundary and the record cursor value.
45
+ :param most_recent_record: the latest record we have received for the slice. This is important to consider because even if the
46
+ cursor emits a slice, some APIs are not able to enforce the upper boundary. The outcome is that the last_record might have a
47
+ higher cursor value than the slice upper boundary and if we want to reduce the duplication as much as possible, we need to
48
+ consider the highest value between the internal cursor, the stream slice upper boundary and the record cursor value.
39
49
  """
40
50
 
41
51
  @abstractmethod
@@ -52,7 +52,12 @@ class DatetimeBasedCursor(Cursor):
52
52
  datetime_format: str
53
53
  config: Config
54
54
  parameters: InitVar[Mapping[str, Any]]
55
- _cursor: Optional[str] = field(repr=False, default=None) # tracks current datetime
55
+ _highest_observed_cursor_field_value: Optional[str] = field(
56
+ repr=False, default=None
57
+ ) # tracks the latest observed datetime, which may not be safe to emit in the case of out-of-order records
58
+ _cursor: Optional[str] = field(
59
+ repr=False, default=None
60
+ ) # tracks the latest observed datetime that is appropriate to emit as stream state
56
61
  end_datetime: Optional[Union[MinMaxDatetime, str]] = None
57
62
  step: Optional[Union[InterpolatedString, str]] = None
58
63
  cursor_granularity: Optional[str] = None
@@ -109,20 +114,39 @@ class DatetimeBasedCursor(Cursor):
109
114
  """
110
115
  self._cursor = stream_state.get(self._cursor_field.eval(self.config)) if stream_state else None
111
116
 
112
- def close_slice(self, stream_slice: StreamSlice, most_recent_record: Optional[Record]) -> None:
117
+ def observe(self, stream_slice: StreamSlice, record: Record) -> None:
118
+ """
119
+ Register a record with the cursor; the cursor instance can then use it to manage the state of the in-progress stream read.
120
+
121
+ :param stream_slice: The current slice, which may or may not contain the most recently observed record
122
+ :param record: the most recently-read record, which the cursor can use to update the stream state. Outwardly-visible changes to the
123
+ stream state may need to be deferred depending on whether the source reliably orders records by the cursor field.
124
+ """
125
+ record_cursor_value = record.get(self._cursor_field.eval(self.config))
126
+ # if the current record has no cursor value, we cannot meaningfully update the state based on it, so there is nothing more to do
127
+ if not record_cursor_value:
128
+ return
129
+
130
+ start_field = self._partition_field_start.eval(self.config)
131
+ end_field = self._partition_field_end.eval(self.config)
132
+ is_highest_observed_cursor_value = not self._highest_observed_cursor_field_value or self.parse_date(
133
+ record_cursor_value
134
+ ) > self.parse_date(self._highest_observed_cursor_field_value)
135
+ if (
136
+ self._is_within_daterange_boundaries(record, stream_slice.get(start_field), stream_slice.get(end_field)) # type: ignore # we know that stream_slices for these cursors will use a string representing an unparsed date
137
+ and is_highest_observed_cursor_value
138
+ ):
139
+ self._highest_observed_cursor_field_value = record_cursor_value
140
+
141
+ def close_slice(self, stream_slice: StreamSlice, _most_recent_record: Optional[Record]) -> None:
113
142
  if stream_slice.partition:
114
143
  raise ValueError(f"Stream slice {stream_slice} should not have a partition. Got {stream_slice.partition}.")
115
- last_record_cursor_value = most_recent_record.get(self._cursor_field.eval(self.config)) if most_recent_record else None
116
- stream_slice_value_end = stream_slice.get(self._partition_field_end.eval(self.config))
117
- potential_cursor_values = [
118
- cursor_value for cursor_value in [self._cursor, last_record_cursor_value, stream_slice_value_end] if cursor_value
119
- ]
120
144
  cursor_value_str_by_cursor_value_datetime = dict(
121
145
  map(
122
146
  # we need to ensure the cursor value is preserved as is in the state else the CATs might complain of something like
123
147
  # 2023-01-04T17:30:19.000Z' <= '2023-01-04T17:30:19.000000Z'
124
- lambda datetime_str: (self.parse_date(datetime_str), datetime_str),
125
- potential_cursor_values,
148
+ lambda datetime_str: (self.parse_date(datetime_str), datetime_str), # type: ignore # because of the filter on the next line, this will only be called with a str
149
+ filter(lambda item: item, [self._cursor, self._highest_observed_cursor_field_value]),
126
150
  )
127
151
  )
128
152
  self._cursor = (
@@ -279,10 +303,26 @@ class DatetimeBasedCursor(Cursor):
279
303
  f"Could not find cursor field `{cursor_field}` in record. The incremental sync will assume it needs to be synced",
280
304
  )
281
305
  return True
282
-
283
306
  latest_possible_cursor_value = self._select_best_end_datetime()
284
307
  earliest_possible_cursor_value = self._calculate_earliest_possible_value(latest_possible_cursor_value)
285
- return earliest_possible_cursor_value <= self.parse_date(record_cursor_value) <= latest_possible_cursor_value
308
+ return self._is_within_daterange_boundaries(record, earliest_possible_cursor_value, latest_possible_cursor_value)
309
+
310
+ def _is_within_daterange_boundaries(
311
+ self, record: Record, start_datetime_boundary: Union[datetime.datetime, str], end_datetime_boundary: Union[datetime.datetime, str]
312
+ ) -> bool:
313
+ cursor_field = self._cursor_field.eval(self.config)
314
+ record_cursor_value = record.get(cursor_field)
315
+ if not record_cursor_value:
316
+ self._send_log(
317
+ Level.WARN,
318
+ f"Could not find cursor field `{cursor_field}` in record. The record will not be considered when emitting sync state",
319
+ )
320
+ return False
321
+ if isinstance(start_datetime_boundary, str):
322
+ start_datetime_boundary = self.parse_date(start_datetime_boundary)
323
+ if isinstance(end_datetime_boundary, str):
324
+ end_datetime_boundary = self.parse_date(end_datetime_boundary)
325
+ return start_datetime_boundary <= self.parse_date(record_cursor_value) <= end_datetime_boundary
286
326
 
287
327
  def _send_log(self, level: Level, message: str) -> None:
288
328
  if self.message_repository:
@@ -86,6 +86,11 @@ class PerPartitionCursor(Cursor):
86
86
  for state in stream_state["states"]:
87
87
  self._cursor_per_partition[self._to_partition_key(state["partition"])] = self._create_cursor(state["cursor"])
88
88
 
89
+ def observe(self, stream_slice: StreamSlice, record: Record) -> None:
90
+ self._cursor_per_partition[self._to_partition_key(stream_slice.partition)].observe(
91
+ StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice), record
92
+ )
93
+
89
94
  def close_slice(self, stream_slice: StreamSlice, most_recent_record: Optional[Record]) -> None:
90
95
  try:
91
96
  cursor_most_recent_record = (
@@ -208,6 +208,20 @@ class CustomPartitionRouter(BaseModel):
208
208
  parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters')
209
209
 
210
210
 
211
+ class CustomSchemaLoader(BaseModel):
212
+ class Config:
213
+ extra = Extra.allow
214
+
215
+ type: Literal['CustomSchemaLoader']
216
+ class_name: str = Field(
217
+ ...,
218
+ description='Fully-qualified name of the class that will be implementing the custom schema loader. The format is `source_<name>.<package>.<class_name>`.',
219
+ examples=['source_railz.components.MyCustomSchemaLoader'],
220
+ title='Class Name',
221
+ )
222
+ parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters')
223
+
224
+
211
225
  class CustomTransformation(BaseModel):
212
226
  class Config:
213
227
  extra = Extra.allow
@@ -1161,7 +1175,9 @@ class DeclarativeStream(BaseModel):
1161
1175
  primary_key: Optional[PrimaryKey] = Field(
1162
1176
  '', description='The primary key of the stream.', title='Primary Key'
1163
1177
  )
1164
- schema_loader: Optional[Union[InlineSchemaLoader, JsonFileSchemaLoader]] = Field(
1178
+ schema_loader: Optional[
1179
+ Union[InlineSchemaLoader, JsonFileSchemaLoader, CustomSchemaLoader]
1180
+ ] = Field(
1165
1181
  None,
1166
1182
  description='Component used to retrieve the schema for the current stream.',
1167
1183
  title='Schema Loader',
@@ -49,6 +49,7 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
49
49
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomRecordFilter as CustomRecordFilterModel
50
50
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomRequester as CustomRequesterModel
51
51
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomRetriever as CustomRetrieverModel
52
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomSchemaLoader as CustomSchemaLoader
52
53
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomTransformation as CustomTransformationModel
53
54
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import DatetimeBasedCursor as DatetimeBasedCursorModel
54
55
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import DeclarativeStream as DeclarativeStreamModel
@@ -165,6 +166,7 @@ class ModelToComponentFactory:
165
166
  CustomRecordFilterModel: self.create_custom_component,
166
167
  CustomRequesterModel: self.create_custom_component,
167
168
  CustomRetrieverModel: self.create_custom_component,
169
+ CustomSchemaLoader: self.create_custom_component,
168
170
  CustomPaginationStrategyModel: self.create_custom_component,
169
171
  CustomPartitionRouterModel: self.create_custom_component,
170
172
  CustomTransformationModel: self.create_custom_component,
@@ -322,7 +322,13 @@ class SimpleRetriever(Retriever):
322
322
  records_schema=records_schema,
323
323
  )
324
324
  for stream_data in self._read_pages(record_generator, self.state, _slice):
325
- most_recent_record_from_slice = self._get_most_recent_record(most_recent_record_from_slice, stream_data, _slice)
325
+ current_record = self._extract_record(stream_data, _slice)
326
+ if self.cursor and current_record:
327
+ self.cursor.observe(_slice, current_record)
328
+
329
+ # TODO this is just the most recent record *read*, not necessarily the most recent record *within slice boundaries*; once all
330
+ # cursors implement a meaningful `observe` method, it can be removed, both from here and the `Cursor.close_slice` method args
331
+ most_recent_record_from_slice = self._get_most_recent_record(most_recent_record_from_slice, current_record, _slice)
326
332
  yield stream_data
327
333
 
328
334
  if self.cursor:
@@ -330,13 +336,13 @@ class SimpleRetriever(Retriever):
330
336
  return
331
337
 
332
338
  def _get_most_recent_record(
333
- self, current_most_recent: Optional[Record], stream_data: StreamData, stream_slice: StreamSlice
339
+ self, current_most_recent: Optional[Record], current_record: Optional[Record], stream_slice: StreamSlice
334
340
  ) -> Optional[Record]:
335
- if self.cursor and (record := self._extract_record(stream_data, stream_slice)):
341
+ if self.cursor and current_record:
336
342
  if not current_most_recent:
337
- return record
343
+ return current_record
338
344
  else:
339
- return current_most_recent if self.cursor.is_greater_than_or_equal(current_most_recent, record) else record
345
+ return current_most_recent if self.cursor.is_greater_than_or_equal(current_most_recent, current_record) else current_record
340
346
  else:
341
347
  return None
342
348
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 0.70.2
3
+ Version: 0.72.0
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://github.com/airbytehq/airbyte
6
6
  Author: Airbyte
@@ -38,7 +38,7 @@ airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py
38
38
  airbyte_cdk/sources/concurrent_source/thread_pool_manager.py,sha256=hFj5rsRtORurl3fwH8GC9h6Uz2wbzBFOLWUxJ-YJ7J8,4801
39
39
  airbyte_cdk/sources/declarative/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
40
40
  airbyte_cdk/sources/declarative/create_partial.py,sha256=sUJOwD8hBzW4pxw2XhYlSTMgl-WMc5WpP5Oq_jo3fHw,3371
41
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=An4UXx4e_GodeVd0bSQTv1G_Z1yjUcb7NbOmcC9-i9I,89327
41
+ airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=netrMub3A9k9wk5VWx8vqDWhfeLk_sviHHJ8NXnH2OA,90111
42
42
  airbyte_cdk/sources/declarative/declarative_source.py,sha256=U2As9PDKmcWDgbsWUo-RetJ9fxQOBlwntWZ0NOgs5Ac,1453
43
43
  airbyte_cdk/sources/declarative/declarative_stream.py,sha256=9nBjSBilzH2aeJsUEqOLyc4G2RRjlPZCapHDMv4jnOU,6691
44
44
  airbyte_cdk/sources/declarative/exceptions.py,sha256=kTPUA4I2NV4J6HDz-mKPGMrfuc592akJnOyYx38l_QM,176
@@ -67,9 +67,9 @@ airbyte_cdk/sources/declarative/extractors/record_extractor.py,sha256=-p9X6UV3iS
67
67
  airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=kH5DrBHr6DdpmGqWx4aFRXkprL-VGEHI5BcG3A-0Cjg,1394
68
68
  airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=m3IzOp_wo6QnQXQ3bpxROmHA0P_YeuPDIpBlWvyBXq0,4366
69
69
  airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=BYzVRQ4MSmhLCajgTi1Y_FHlwCBTdsMDT6zRmYMytws,425
70
- airbyte_cdk/sources/declarative/incremental/cursor.py,sha256=cGAYP-Std-_MNsX4KGIP4FwDff6WdECV7CMgMi1uKSg,2890
71
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=B-4CmFwv6zXDHYJ2NFe-Ct-n360YMNSZ4ruxGe8cuSg,15802
72
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=8Oq6pgbQjAlkQCPAgOoa-fK9Yb6iyx1oYqdsaMjamT0,12241
70
+ airbyte_cdk/sources/declarative/incremental/cursor.py,sha256=KgKGGgVY_JJo4JHRafo5__61Xu3hVfTvDKoSSM6AmTA,3523
71
+ airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=uLFSm5K8oBbbQXDrAYDHY_B55Nzwv27m4Qvgegqx5GM,18384
72
+ airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=pXVty1WnW2YJ013yoApYgNBdEat47XjgKCMhLS7RkEg,12504
73
73
  airbyte_cdk/sources/declarative/interpolation/__init__.py,sha256=tjUJkn3B-iZ-p7RP2c3dVZejrGiQeooGmS5ibWTuUL4,437
74
74
  airbyte_cdk/sources/declarative/interpolation/filters.py,sha256=V5XL-IEFNn08YdkJl4A54-G73qJ8P7WAQOYgf1-fXzQ,2809
75
75
  airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py,sha256=p5XbZB1cvP_U6HBBHX4PIFlXMHB9vdhSeZ5N3N8AuBY,1835
@@ -80,14 +80,14 @@ airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=dyIM-bzh54
80
80
  airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=hOLBs9VaaE5xsT2wY2VxSrISE165bu_Egb83ordG4XI,5379
81
81
  airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=V6WGKJ9cXX1rjuM4bK3Cs9xEryMlkY2U3FMsSBhrgC8,3098
82
82
  airbyte_cdk/sources/declarative/models/__init__.py,sha256=EiYnzwCHZV7EYqMJqcy6xKSeHvTKZBsQndjbEwmiTW4,93
83
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=u6i4nYufXZe27bIED7uhsvfhghOMN4NdWoGLEeTjzwk,61032
83
+ airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=1a67m2fGAdBRf7rOzvk5SIluQHIWL4SPLnrjsnrnm_s,61574
84
84
  airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
85
85
  airbyte_cdk/sources/declarative/parsers/class_types_registry.py,sha256=5vOvMuyWlpALrOq2ehLxa7wO6tlFIlgUNtMYrMCKIjE,6092
86
86
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=y7_G5mM07zxT5YG975kdC2PAja-Uc83pYp8WrV3GNdo,522
87
87
  airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py,sha256=W8BcK4KOg4ifNXgsdeIoV4oneHjXBKcPHEZHIC4r-hM,3801
88
88
  airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=i2yUdrdlPUHI0dSQX0zBT8WSg912SMiCF8qkQ8VvlA4,8287
89
89
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=6ukHx0bBrCJm9rek1l_MEfS3U_gdJcM4pJRyifJEOp0,6412
90
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=_jX1sPW5SA51nHoxpoVktC3jdO8PR4cGrQfZftcVSLo,59316
90
+ airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=1zI1Mk9k_3p_TMz6LsgV7U54CJ6etl88q8WOv4AZO-w,59499
91
91
  airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=27sOWhw2LBQs62HchURakHQ2M_mtnOatNgU6q8RUtpU,476
92
92
  airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha256=L22D-up7W2HahZZo2dA-IbRSs7qJEahU6O6bU0eiIt8,4324
93
93
  airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py,sha256=cl-TQdu_m6_IF20gdD1jll0SpejIyMZHvyGXx2NafuI,1611
@@ -128,7 +128,7 @@ airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_
128
128
  airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py,sha256=mCdh3UoAZoLycm--JfWDxXcjMKI2j6bFkRZRdOz67xc,2602
129
129
  airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=IiHXDeKtibRqeWcRUckmSiXfk--u-sFMw3APWK8PCGQ,339
130
130
  airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=h3wI68k9NxYE39jMZPOzL72XYTcScFvVeev-DZ_nPoo,1753
131
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=1r-xK2D-yZPNpOKCaU9z500r5T96jWV7io-sjmh3QkQ,19204
131
+ airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=yHciSsy1CK-NyfWRO4B_ySLF35itv6tkrCBaL_ZSAO4,19650
132
132
  airbyte_cdk/sources/declarative/schema/__init__.py,sha256=ul8L9S0-__AMEdbCLHBq-PMEeA928NVp8BB83BMotfU,517
133
133
  airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=t0ll098cIG2Wr1rq1rZ3QDZ9WnScUuqAh42YVoTRWrU,1794
134
134
  airbyte_cdk/sources/declarative/schema/inline_schema_loader.py,sha256=bVETE10hRsatRJq3R3BeyRR0wIoK3gcP1gcpVRQ_P5U,464
@@ -315,9 +315,9 @@ unit_tests/sources/declarative/extractors/test_dpath_extractor.py,sha256=-bgWKAi
315
315
  unit_tests/sources/declarative/extractors/test_record_filter.py,sha256=mcR6Zc3BoVDm_hkmx3J3zFShi2CdudqxR2U1JRxkgzA,2329
316
316
  unit_tests/sources/declarative/extractors/test_record_selector.py,sha256=06gLLRwom45YtdsKm9OUabpkioXSDKEnv0DsDTvItC4,6884
317
317
  unit_tests/sources/declarative/incremental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
318
- unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py,sha256=cvgBYxAJM-_w7ABOL66vZVPr7PBHtt7YGCALIeq9I9Q,36375
318
+ unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py,sha256=R-QX1npdwzGShcxY3_Zrbm1jP0sxyZ8AcOe2O-vtWdo,38486
319
319
  unit_tests/sources/declarative/incremental/test_per_partition_cursor.py,sha256=Xj3vYxB2kbhOZkJ9p2MXuOLq0FwU17UoxM1vuRaN1_k,20402
320
- unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py,sha256=S4Vg2qf7RVRRxw5DkBjSOOFc_LCKUs7fZ8Qtf4WGB08,12519
320
+ unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py,sha256=SeEb7jfXRPO7bbQVtm1eeo-JOR4PQXDkWbOA3fc89Kw,12897
321
321
  unit_tests/sources/declarative/interpolation/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
322
322
  unit_tests/sources/declarative/interpolation/test_filters.py,sha256=gPGDNPeLu87rj1WcVVpLsosh6Dzgh0ihsyDkEIdGI_E,2388
323
323
  unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py,sha256=vr45nStygl09eNsFYN_YiaqcLBWse9OW_wMc7orHoHU,1804
@@ -329,7 +329,7 @@ unit_tests/sources/declarative/interpolation/test_macros.py,sha256=vEZmHQ0KsfQUz
329
329
  unit_tests/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
330
330
  unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py,sha256=egePHWYcXprfPtoHhiquWAXuJkDr-DB_RakKhdyaoHs,14316
331
331
  unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py,sha256=K3q9eyx-sJFQ8nGYjAgS7fxau4sX_FlNreEAjiCYOeE,5306
332
- unit_tests/sources/declarative/parsers/test_model_to_component_factory.py,sha256=WIUygDJvNGEIzNdlarkYymTTgRtiXlR6IOjwnsqlC3E,75683
332
+ unit_tests/sources/declarative/parsers/test_model_to_component_factory.py,sha256=cDlDoNutC6JMGdyvkYMteiHtVrpQ_cKnRE_yn6dWui0,76426
333
333
  unit_tests/sources/declarative/parsers/testing_components.py,sha256=_yUijmYRM-yYHPGDB2JsfEiOuVrgexGW9QwHf1xxNW8,1326
334
334
  unit_tests/sources/declarative/partition_routers/__init__.py,sha256=O8MZg4Bv_DghdRy9BoJCPIqdV75VtiUrhEkExQgb2nE,61
335
335
  unit_tests/sources/declarative/partition_routers/test_list_partition_router.py,sha256=WKdbAQCHfCVOyoAFM_kbHsbqAF_e5FX5Zvou5ARsJZ4,6572
@@ -360,7 +360,7 @@ unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py,sha2
360
360
  unit_tests/sources/declarative/requesters/request_options/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
361
361
  unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py,sha256=bjcaTb8I37tBhs5b_FLRTLkDZAmKjGRywpcN4oGl-zI,5900
362
362
  unit_tests/sources/declarative/retrievers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
363
- unit_tests/sources/declarative/retrievers/test_simple_retriever.py,sha256=mDF2m-2hqTr7VkrdoIffE8vMaUU9sv5D4At0DVtgEJw,20273
363
+ unit_tests/sources/declarative/retrievers/test_simple_retriever.py,sha256=TLd9k1GsV0kjl2rVvObUzoKWFcYW0ILBvOOJVTzqxZ4,20316
364
364
  unit_tests/sources/declarative/schema/__init__.py,sha256=i-iWyCqXPVgY-4miy16FH8U06gW_1_49AVq_8S8rVWY,134
365
365
  unit_tests/sources/declarative/schema/test_default_schema_loader.py,sha256=cWOFJnT9fhcEU6XLHkoe3E83mCjWc8lEttT0PFcvAm8,1091
366
366
  unit_tests/sources/declarative/schema/test_inline_schema_loader.py,sha256=vDJauhZ8og8M9ZqKDbf12SSYSfhUZ0_LmH7zjJHCHwI,517
@@ -459,8 +459,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg
459
459
  unit_tests/utils/test_secret_utils.py,sha256=CdKK8A2-5XVxbXVtX22FK9dwwMeP5KNqDH6luWRXSNw,5256
460
460
  unit_tests/utils/test_stream_status_utils.py,sha256=Xr8MZ2HWgTVIyMbywDvuYkRaUF4RZLQOT8-JjvcfR24,2970
461
461
  unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
462
- airbyte_cdk-0.70.2.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
463
- airbyte_cdk-0.70.2.dist-info/METADATA,sha256=2B0x5Y9M3ZyRPwYmXjynUjYCeFnTldJIPQCAcAuqYMs,11074
464
- airbyte_cdk-0.70.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
465
- airbyte_cdk-0.70.2.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
466
- airbyte_cdk-0.70.2.dist-info/RECORD,,
462
+ airbyte_cdk-0.72.0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
463
+ airbyte_cdk-0.72.0.dist-info/METADATA,sha256=lDN4hbkJHUsXxycTKKTDAgCzzo72JYBIzlCOZzuU0nM,11074
464
+ airbyte_cdk-0.72.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
465
+ airbyte_cdk-0.72.0.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
466
+ airbyte_cdk-0.72.0.dist-info/RECORD,,
@@ -338,55 +338,96 @@ def test_stream_slices(
338
338
 
339
339
 
340
340
  @pytest.mark.parametrize(
341
- "test_name, previous_cursor, stream_slice, latest_record_data, expected_state",
341
+ "test_name, previous_cursor, stream_slice, observed_records, expected_state",
342
342
  [
343
343
  (
344
344
  "test_close_slice_previous_cursor_is_highest",
345
345
  "2023-01-01",
346
- StreamSlice(partition={}, cursor_slice={"end_time": "2022-01-01"}),
347
- {cursor_field: "2021-01-01"},
346
+ StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
347
+ [{cursor_field: "2021-01-01"}],
348
348
  {cursor_field: "2023-01-01"},
349
349
  ),
350
350
  (
351
351
  "test_close_slice_stream_slice_partition_end_is_highest",
352
- "2021-01-01",
353
- StreamSlice(partition={}, cursor_slice={"end_time": "2023-01-01"}),
352
+ "2020-01-01",
353
+ StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2023-01-01"}),
354
+ [{cursor_field: "2021-01-01"}],
354
355
  {cursor_field: "2021-01-01"},
355
- {cursor_field: "2023-01-01"},
356
356
  ),
357
357
  (
358
- "test_close_slice_latest_record_cursor_value_is_highest",
358
+ "test_close_slice_latest_record_cursor_value_is_higher_than_slice_end",
359
359
  "2021-01-01",
360
- StreamSlice(partition={}, cursor_slice={"end_time": "2022-01-01"}),
361
- {cursor_field: "2023-01-01"},
362
- {cursor_field: "2023-01-01"},
360
+ StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
361
+ [{cursor_field: "2023-01-01"}],
362
+ {cursor_field: "2021-01-01"},
363
363
  ),
364
364
  (
365
- "test_close_slice_without_latest_record",
365
+ "test_close_slice_with_no_records_observed",
366
366
  "2021-01-01",
367
- StreamSlice(partition={}, cursor_slice={"end_time": "2022-01-01"}),
367
+ StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
368
+ [],
369
+ {cursor_field: "2021-01-01"},
370
+ ),
371
+ (
372
+ "test_close_slice_with_no_records_observed_and_no_previous_state",
368
373
  None,
374
+ StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
375
+ [],
376
+ {},
377
+ ),
378
+ (
379
+ "test_close_slice_without_previous_cursor",
380
+ None,
381
+ StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2023-01-01"}),
382
+ [{cursor_field: "2022-01-01"}],
369
383
  {cursor_field: "2022-01-01"},
370
384
  ),
371
385
  (
372
- "test_close_slice_without_cursor",
386
+ "test_close_slice_with_out_of_order_records",
387
+ "2021-01-01",
388
+ StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
389
+ [{cursor_field: "2021-04-01"}, {cursor_field: "2021-02-01"}, {cursor_field: "2021-03-01"}],
390
+ {cursor_field: "2021-04-01"},
391
+ ),
392
+ (
393
+ "test_close_slice_with_some_records_out_of_slice_boundaries",
394
+ "2021-01-01",
395
+ StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
396
+ [{cursor_field: "2021-02-01"}, {cursor_field: "2021-03-01"}, {cursor_field: "2023-01-01"}],
397
+ {cursor_field: "2021-03-01"},
398
+ ),
399
+ (
400
+ "test_close_slice_with_all_records_out_of_slice_boundaries",
401
+ "2021-01-01",
402
+ StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
403
+ [{cursor_field: "2023-01-01"}],
404
+ {cursor_field: "2021-01-01"},
405
+ ),
406
+ (
407
+ "test_close_slice_with_all_records_out_of_slice_and_no_previous_cursor",
373
408
  None,
374
- StreamSlice(partition={}, cursor_slice={"end_time": "2022-01-01"}),
375
- {cursor_field: "2023-01-01"},
376
- {cursor_field: "2023-01-01"},
409
+ StreamSlice(partition={}, cursor_slice={"start_time": "2021-01-01", "end_time": "2022-01-01"}),
410
+ [{cursor_field: "2023-01-01"}],
411
+ {},
377
412
  ),
378
413
  ],
379
414
  )
380
- def test_close_slice(test_name, previous_cursor, stream_slice, latest_record_data, expected_state):
415
+ def test_close_slice(test_name, previous_cursor, stream_slice, observed_records, expected_state):
381
416
  cursor = DatetimeBasedCursor(
382
417
  start_datetime=MinMaxDatetime(datetime="2021-01-01T00:00:00.000000+0000", parameters={}),
383
418
  cursor_field=InterpolatedString(string=cursor_field, parameters={}),
384
419
  datetime_format="%Y-%m-%d",
385
420
  config=config,
386
421
  parameters={},
422
+ partition_field_start="start_time",
423
+ partition_field_end="end_time",
387
424
  )
388
- cursor._cursor = previous_cursor
389
- cursor.close_slice(stream_slice, Record(latest_record_data, stream_slice) if latest_record_data else None)
425
+ cursor.set_initial_state({cursor_field: previous_cursor})
426
+ for record_data in observed_records:
427
+ record = Record(record_data, stream_slice)
428
+ cursor.observe(stream_slice, record)
429
+ last_record = observed_records[-1] if observed_records else None
430
+ cursor.close_slice(stream_slice, Record(last_record, stream_slice) if last_record else None)
390
431
  updated_state = cursor.get_stream_state()
391
432
  assert updated_state == expected_state
392
433
 
@@ -404,37 +445,42 @@ def test_close_slice_fails_if_slice_has_a_partition():
404
445
  cursor.close_slice(stream_slice, Record({"id": 1}, stream_slice))
405
446
 
406
447
 
407
- def test_given_different_format_and_slice_is_highest_when_close_slice_then_slice_datetime_format():
448
+ def test_compares_cursor_values_by_chronological_order():
408
449
  cursor = DatetimeBasedCursor(
409
450
  start_datetime=MinMaxDatetime(datetime="2021-01-01T00:00:00.000000+0000", parameters={}),
410
451
  cursor_field=cursor_field,
411
- datetime_format="%Y-%m-%dT%H:%M:%S.%fZ",
412
- cursor_datetime_formats=["%Y-%m-%d"],
452
+ datetime_format="%d-%m-%Y",
413
453
  config=config,
414
454
  parameters={},
415
455
  )
416
456
 
417
- _slice = StreamSlice(partition={}, cursor_slice={"end_time": "2023-01-04T17:30:19.000Z"})
418
- record_cursor_value = "2023-01-03"
419
- cursor.close_slice(_slice, Record({cursor_field: record_cursor_value}, _slice))
457
+ _slice = StreamSlice(partition={}, cursor_slice={"start_time": "01-01-2023", "end_time": "01-04-2023"})
458
+ first_record = Record({cursor_field: "21-02-2023"}, _slice)
459
+ cursor.observe(_slice, first_record)
460
+ second_record = Record({cursor_field: "01-03-2023"}, _slice)
461
+ cursor.observe(_slice, second_record)
462
+ cursor.close_slice(_slice, second_record)
420
463
 
421
- assert cursor.get_stream_state()[cursor_field] == "2023-01-04T17:30:19.000Z"
464
+ assert cursor.get_stream_state()[cursor_field] == "01-03-2023"
422
465
 
423
466
 
424
- def test_given_partition_end_is_specified_and_greater_than_record_when_close_slice_then_use_partition_end():
425
- partition_field_end = "partition_field_end"
467
+ def test_given_different_format_and_slice_is_highest_when_close_slice_then_state_uses_record_format():
426
468
  cursor = DatetimeBasedCursor(
427
469
  start_datetime=MinMaxDatetime(datetime="2021-01-01T00:00:00.000000+0000", parameters={}),
428
- cursor_field=InterpolatedString(string=cursor_field, parameters={}),
429
- datetime_format="%Y-%m-%d",
430
- partition_field_end=partition_field_end,
470
+ cursor_field=cursor_field,
471
+ datetime_format="%Y-%m-%dT%H:%M:%S.%fZ",
472
+ cursor_datetime_formats=["%Y-%m-%d"],
431
473
  config=config,
432
474
  parameters={},
433
475
  )
434
- stream_slice = StreamSlice(partition={}, cursor_slice={partition_field_end: "2025-01-01"})
435
- cursor.close_slice(stream_slice, Record({cursor_field: "2020-01-01"}, stream_slice))
436
- updated_state = cursor.get_stream_state()
437
- assert {cursor_field: "2025-01-01"} == updated_state
476
+
477
+ _slice = StreamSlice(partition={}, cursor_slice={"start_time": "2023-01-01T17:30:19.000Z", "end_time": "2023-01-04T17:30:19.000Z"})
478
+ record_cursor_value = "2023-01-03"
479
+ record = Record({cursor_field: record_cursor_value}, _slice)
480
+ cursor.observe(_slice, record)
481
+ cursor.close_slice(_slice, record)
482
+
483
+ assert cursor.get_stream_state()[cursor_field] == "2023-01-03"
438
484
 
439
485
 
440
486
  @pytest.mark.parametrize(
@@ -200,14 +200,14 @@ def test_given_record_for_partition_when_read_then_update_state():
200
200
  "states": [
201
201
  {
202
202
  "partition": {"partition_field": "1"},
203
- "cursor": {CURSOR_FIELD: "2022-01-31"},
203
+ "cursor": {CURSOR_FIELD: "2022-01-15"},
204
204
  }
205
205
  ]
206
206
  }
207
207
 
208
208
 
209
209
  def test_substream_without_input_state():
210
- source = ManifestDeclarativeSource(
210
+ test_source = ManifestDeclarativeSource(
211
211
  source_config=ManifestBuilder()
212
212
  .with_substream_partition_router("AnotherStream")
213
213
  .with_incremental_sync(
@@ -231,14 +231,14 @@ def test_substream_without_input_state():
231
231
  .build()
232
232
  )
233
233
 
234
- stream_instance = source.streams({})[1]
234
+ stream_instance = test_source.streams({})[1]
235
235
 
236
236
  stream_slice = StreamSlice(partition={"parent_id": "1"},
237
237
  cursor_slice={"start_time": "2022-01-01", "end_time": "2022-01-31"})
238
238
 
239
239
  with patch.object(
240
240
  SimpleRetriever, "_read_pages", side_effect=[[Record({"id": "1", CURSOR_FIELD: "2022-01-15"}, stream_slice)],
241
- Record({"id": "2", CURSOR_FIELD: "2022-01-15"}, stream_slice)]
241
+ [Record({"id": "2", CURSOR_FIELD: "2022-01-15"}, stream_slice)]]
242
242
  ):
243
243
  slices = list(stream_instance.stream_slices(sync_mode=SYNC_MODE))
244
244
  assert list(slices) == [
@@ -246,6 +246,10 @@ def test_substream_without_input_state():
246
246
  cursor_slice={"start_time": "2022-01-01", "end_time": "2022-01-31"}),
247
247
  StreamSlice(partition={"parent_id": "1", "parent_slice": {}, },
248
248
  cursor_slice={"start_time": "2022-02-01", "end_time": "2022-02-28"}),
249
+ StreamSlice(partition={"parent_id": "2", "parent_slice": {}, },
250
+ cursor_slice={"start_time": "2022-01-01", "end_time": "2022-01-31"}),
251
+ StreamSlice(partition={"parent_id": "2", "parent_slice": {}, },
252
+ cursor_slice={"start_time": "2022-02-01", "end_time": "2022-02-28"}),
249
253
  ]
250
254
 
251
255
 
@@ -307,7 +311,7 @@ def test_substream_with_legacy_input_state():
307
311
  with patch.object(
308
312
  SimpleRetriever, "_read_pages", side_effect=[
309
313
  [Record({"id": "1", CURSOR_FIELD: "2022-01-15"}, stream_slice)],
310
- [Record({"parent_id": "1"}, stream_slice)],
314
+ [Record({"parent_id": "1", CURSOR_FIELD: "2022-01-15"}, stream_slice)],
311
315
  [Record({"id": "2", CURSOR_FIELD: "2022-01-15"}, stream_slice)],
312
316
  [Record({"parent_id": "2", CURSOR_FIELD: "2022-01-15"}, stream_slice)]
313
317
  ]
@@ -319,7 +323,7 @@ def test_substream_with_legacy_input_state():
319
323
  expected_state = {"states": [
320
324
  {
321
325
  "cursor": {
322
- "cursor_field": "2022-01-31"
326
+ CURSOR_FIELD: "2022-01-15"
323
327
  },
324
328
  "partition": {"parent_id": "1", "parent_slice": {}}
325
329
  }
@@ -5,6 +5,7 @@
5
5
  # mypy: ignore-errors
6
6
 
7
7
  import datetime
8
+ from typing import Any, Mapping
8
9
 
9
10
  import pytest
10
11
  from airbyte_cdk.models import Level
@@ -27,6 +28,7 @@ from airbyte_cdk.sources.declarative.models import CheckStream as CheckStreamMod
27
28
  from airbyte_cdk.sources.declarative.models import CompositeErrorHandler as CompositeErrorHandlerModel
28
29
  from airbyte_cdk.sources.declarative.models import CustomErrorHandler as CustomErrorHandlerModel
29
30
  from airbyte_cdk.sources.declarative.models import CustomPartitionRouter as CustomPartitionRouterModel
31
+ from airbyte_cdk.sources.declarative.models import CustomSchemaLoader as CustomSchemaLoaderModel
30
32
  from airbyte_cdk.sources.declarative.models import DatetimeBasedCursor as DatetimeBasedCursorModel
31
33
  from airbyte_cdk.sources.declarative.models import DeclarativeStream as DeclarativeStreamModel
32
34
  from airbyte_cdk.sources.declarative.models import DefaultPaginator as DefaultPaginatorModel
@@ -66,6 +68,7 @@ from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
66
68
  from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod
67
69
  from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever, SimpleRetrieverTestReadDecorator
68
70
  from airbyte_cdk.sources.declarative.schema import JsonFileSchemaLoader
71
+ from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
69
72
  from airbyte_cdk.sources.declarative.spec import Spec
70
73
  from airbyte_cdk.sources.declarative.stream_slicers import CartesianProductStreamSlicer
71
74
  from airbyte_cdk.sources.declarative.transformations import AddFields, RemoveFields
@@ -1820,3 +1823,19 @@ def test_create_offset_increment():
1820
1823
  assert strategy.page_size == expected_strategy.page_size
1821
1824
  assert strategy.inject_on_first_request == expected_strategy.inject_on_first_request
1822
1825
  assert strategy.config == input_config
1826
+
1827
+
1828
+ class MyCustomSchemaLoader(SchemaLoader):
1829
+ def get_json_schema(self) -> Mapping[str, Any]:
1830
+ """Returns a mapping describing the stream's schema"""
1831
+ return {}
1832
+
1833
+
1834
+ def test_create_custom_schema_loader():
1835
+
1836
+ definition = {
1837
+ "type": "CustomSchemaLoader",
1838
+ "class_name": "unit_tests.sources.declarative.parsers.test_model_to_component_factory.MyCustomSchemaLoader"
1839
+ }
1840
+ component = factory.create_component(CustomSchemaLoaderModel, definition, {})
1841
+ assert isinstance(component, MyCustomSchemaLoader)
@@ -477,6 +477,7 @@ def test_given_stream_data_is_not_record_when_read_records_then_update_slice_wit
477
477
  side_effect=retriever_read_pages,
478
478
  ):
479
479
  list(retriever.read_records(stream_slice=stream_slice, records_schema={}))
480
+ cursor.observe.assert_not_called()
480
481
  cursor.close_slice.assert_called_once_with(stream_slice, None)
481
482
 
482
483