airbyte-cdk 6.37.0.dev1__py3-none-any.whl → 6.37.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. airbyte_cdk/connector_builder/models.py +16 -14
  2. airbyte_cdk/connector_builder/test_reader/helpers.py +120 -22
  3. airbyte_cdk/connector_builder/test_reader/message_grouper.py +16 -3
  4. airbyte_cdk/connector_builder/test_reader/types.py +9 -1
  5. airbyte_cdk/sources/declarative/auth/token_provider.py +1 -0
  6. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +43 -7
  7. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +7 -1
  8. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +67 -46
  9. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +13 -2
  10. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +1 -0
  11. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +83 -17
  12. airbyte_cdk/sources/declarative/interpolation/macros.py +2 -0
  13. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +30 -45
  14. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +18 -4
  15. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +171 -70
  16. airbyte_cdk/sources/declarative/partition_routers/__init__.py +0 -4
  17. airbyte_cdk/sources/declarative/requesters/README.md +5 -5
  18. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +60 -17
  19. airbyte_cdk/sources/declarative/requesters/http_requester.py +7 -1
  20. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +10 -3
  21. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +2 -2
  22. airbyte_cdk/sources/http_logger.py +3 -0
  23. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +2 -1
  24. airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
  25. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +1 -0
  26. {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.1.dist-info}/METADATA +2 -2
  27. {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.1.dist-info}/RECORD +31 -31
  28. airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +0 -136
  29. {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.1.dist-info}/LICENSE.txt +0 -0
  30. {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.1.dist-info}/LICENSE_SHORT +0 -0
  31. {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.1.dist-info}/WHEEL +0 -0
  32. {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.1.dist-info}/entry_points.txt +0 -0
@@ -777,6 +777,44 @@ definitions:
777
777
  type:
778
778
  type: string
779
779
  enum: [LegacyToPerPartitionStateMigration]
780
+ IncrementingCountCursor:
781
+ title: Incrementing Count Cursor
782
+ description: Cursor that allows for incremental sync according to a continuously increasing integer.
783
+ type: object
784
+ required:
785
+ - type
786
+ - cursor_field
787
+ properties:
788
+ type:
789
+ type: string
790
+ enum: [IncrementingCountCursor]
791
+ cursor_field:
792
+ title: Cursor Field
793
+ description: The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.
794
+ type: string
795
+ interpolation_context:
796
+ - config
797
+ examples:
798
+ - "created_at"
799
+ - "{{ config['record_cursor'] }}"
800
+ start_value:
801
+ title: Start Value
802
+ description: The value that determines the earliest record that should be synced.
803
+ anyOf:
804
+ - type: string
805
+ - type: integer
806
+ interpolation_context:
807
+ - config
808
+ examples:
809
+ - 0
810
+ - "{{ config['start_value'] }}"
811
+ start_value_option:
812
+ title: Inject Start Value Into Outgoing HTTP Request
813
+ description: Optionally configures how the start value will be sent in requests to the source API.
814
+ "$ref": "#/definitions/RequestOption"
815
+ $parameters:
816
+ type: object
817
+ additionalProperties: true
780
818
  DatetimeBasedCursor:
781
819
  title: Datetime Based Cursor
782
820
  description: Cursor to provide incremental capabilities over datetime.
@@ -844,6 +882,7 @@ definitions:
844
882
  * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`
845
883
  * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`
846
884
  * **%f**: Microsecond (zero-padded to 6 digits) - `000000`
885
+ * **%_ms**: Millisecond (zero-padded to 3 digits) - `000`
847
886
  * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`
848
887
  * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`
849
888
  * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`
@@ -1318,6 +1357,7 @@ definitions:
1318
1357
  anyOf:
1319
1358
  - "$ref": "#/definitions/CustomIncrementalSync"
1320
1359
  - "$ref": "#/definitions/DatetimeBasedCursor"
1360
+ - "$ref": "#/definitions/IncrementingCountCursor"
1321
1361
  name:
1322
1362
  title: Name
1323
1363
  description: The stream name.
@@ -1490,7 +1530,11 @@ definitions:
1490
1530
  limit:
1491
1531
  title: Limit
1492
1532
  description: The maximum number of calls allowed within the interval.
1493
- type: integer
1533
+ anyOf:
1534
+ - type: integer
1535
+ - type: string
1536
+ interpolation_context:
1537
+ - config
1494
1538
  interval:
1495
1539
  title: Interval
1496
1540
  description: The time interval for the rate limit.
@@ -1775,6 +1819,9 @@ definitions:
1775
1819
  - stream_interval
1776
1820
  - stream_partition
1777
1821
  - stream_slice
1822
+ - creation_response
1823
+ - polling_response
1824
+ - download_target
1778
1825
  examples:
1779
1826
  - "/products"
1780
1827
  - "/quotes/{{ stream_partition['id'] }}/quote_line_groups"
@@ -2394,6 +2441,7 @@ definitions:
2394
2441
  * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`
2395
2442
  * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`
2396
2443
  * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`
2444
+ * **%_ms**: Millisecond (zero-padded to 3 digits) - `000`, `001`, ..., `999`
2397
2445
  * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`
2398
2446
  * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`
2399
2447
  * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`
@@ -3130,14 +3178,12 @@ definitions:
3130
3178
  - "$ref": "#/definitions/CustomPartitionRouter"
3131
3179
  - "$ref": "#/definitions/ListPartitionRouter"
3132
3180
  - "$ref": "#/definitions/SubstreamPartitionRouter"
3133
- - "$ref": "#/definitions/GroupingPartitionRouter"
3134
3181
  - type: array
3135
3182
  items:
3136
3183
  anyOf:
3137
3184
  - "$ref": "#/definitions/CustomPartitionRouter"
3138
3185
  - "$ref": "#/definitions/ListPartitionRouter"
3139
3186
  - "$ref": "#/definitions/SubstreamPartitionRouter"
3140
- - "$ref": "#/definitions/GroupingPartitionRouter"
3141
3187
  decoder:
3142
3188
  title: Decoder
3143
3189
  description: Component decoding the response so records can be extracted.
@@ -3221,7 +3267,7 @@ definitions:
3221
3267
  - polling_requester
3222
3268
  - download_requester
3223
3269
  - status_extractor
3224
- - urls_extractor
3270
+ - download_target_extractor
3225
3271
  properties:
3226
3272
  type:
3227
3273
  type: string
@@ -3238,7 +3284,7 @@ definitions:
3238
3284
  anyOf:
3239
3285
  - "$ref": "#/definitions/CustomRecordExtractor"
3240
3286
  - "$ref": "#/definitions/DpathExtractor"
3241
- urls_extractor:
3287
+ download_target_extractor:
3242
3288
  description: Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.
3243
3289
  anyOf:
3244
3290
  - "$ref": "#/definitions/CustomRecordExtractor"
@@ -3259,7 +3305,7 @@ definitions:
3259
3305
  anyOf:
3260
3306
  - "$ref": "#/definitions/CustomRequester"
3261
3307
  - "$ref": "#/definitions/HttpRequester"
3262
- url_requester:
3308
+ download_target_requester:
3263
3309
  description: Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.
3264
3310
  anyOf:
3265
3311
  - "$ref": "#/definitions/CustomRequester"
@@ -3292,14 +3338,12 @@ definitions:
3292
3338
  - "$ref": "#/definitions/CustomPartitionRouter"
3293
3339
  - "$ref": "#/definitions/ListPartitionRouter"
3294
3340
  - "$ref": "#/definitions/SubstreamPartitionRouter"
3295
- - "$ref": "#/definitions/GroupingPartitionRouter"
3296
3341
  - type: array
3297
3342
  items:
3298
3343
  anyOf:
3299
3344
  - "$ref": "#/definitions/CustomPartitionRouter"
3300
3345
  - "$ref": "#/definitions/ListPartitionRouter"
3301
3346
  - "$ref": "#/definitions/SubstreamPartitionRouter"
3302
- - "$ref": "#/definitions/GroupingPartitionRouter"
3303
3347
  decoder:
3304
3348
  title: Decoder
3305
3349
  description: Component decoding the response so records can be extracted.
@@ -3416,44 +3460,6 @@ definitions:
3416
3460
  $parameters:
3417
3461
  type: object
3418
3462
  additionalProperties: true
3419
- GroupingPartitionRouter:
3420
- title: Grouping Partition Router
3421
- description: >
3422
- A decorator on top of a partition router that groups partitions into batches of a specified size.
3423
- This is useful for APIs that support filtering by multiple partition keys in a single request.
3424
- Note that per-partition incremental syncs may not work as expected because the grouping
3425
- of partitions might change between syncs, potentially leading to inconsistent state tracking.
3426
- type: object
3427
- required:
3428
- - type
3429
- - group_size
3430
- - underlying_partition_router
3431
- properties:
3432
- type:
3433
- type: string
3434
- enum: [GroupingPartitionRouter]
3435
- group_size:
3436
- title: Group Size
3437
- description: The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.
3438
- type: integer
3439
- examples:
3440
- - 10
3441
- - 50
3442
- underlying_partition_router:
3443
- title: Underlying Partition Router
3444
- description: The partition router whose output will be grouped. This can be any valid partition router component.
3445
- anyOf:
3446
- - "$ref": "#/definitions/CustomPartitionRouter"
3447
- - "$ref": "#/definitions/ListPartitionRouter"
3448
- - "$ref": "#/definitions/SubstreamPartitionRouter"
3449
- deduplicate:
3450
- title: Deduplicate Partitions
3451
- description: If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.
3452
- type: boolean
3453
- default: true
3454
- $parameters:
3455
- type: object
3456
- additionalProperties: true
3457
3463
  WaitUntilTimeFromHeader:
3458
3464
  title: Wait Until Time Defined In Response Header
3459
3465
  description: Extract time at which we can retry the request from response header and wait for the difference between now and that time.
@@ -3705,6 +3711,21 @@ interpolation:
3705
3711
  self: https://api.sendgrid.com/v3/marketing/lists?page_size=1&page_token=
3706
3712
  next: https://api.sendgrid.com/v3/marketing/lists?page_size=1&page_token=0236d6d2
3707
3713
  count: 82
3714
+ - title: creation_response
3715
+ description: The response received from the creation_requester in the AsyncRetriever component.
3716
+ type: object
3717
+ examples:
3718
+ - id: "1234"
3719
+ - title: polling_response
3720
+ description: The response received from the polling_requester in the AsyncRetriever component.
3721
+ type: object
3722
+ examples:
3723
+ - id: "1234"
3724
+ - title: download_target
3725
+ description: The `URL` received from the polling_requester in the AsyncRetriever with jobStatus as `COMPLETED`.
3726
+ type: string
3727
+ examples:
3728
+ - "https://api.sendgrid.com/v3/marketing/lists?page_size=1&page_token=0236d6d2&filename=xxx_yyy_zzz.csv"
3708
3729
  - title: stream_interval
3709
3730
  description: The current stream interval being processed. The keys are defined by the incremental sync component. Default keys are `start_time` and `end_time`.
3710
3731
  type: object
@@ -107,6 +107,16 @@ class CsvParser(Parser):
107
107
  encoding: Optional[str] = "utf-8"
108
108
  delimiter: Optional[str] = ","
109
109
 
110
+ def _get_delimiter(self) -> Optional[str]:
111
+ """
112
+ Get delimiter from the configuration. Check for the escape character and decode it.
113
+ """
114
+ if self.delimiter is not None:
115
+ if self.delimiter.startswith("\\"):
116
+ self.delimiter = self.delimiter.encode("utf-8").decode("unicode_escape")
117
+
118
+ return self.delimiter
119
+
110
120
  def parse(
111
121
  self,
112
122
  data: BufferedIOBase,
@@ -115,8 +125,9 @@ class CsvParser(Parser):
115
125
  Parse CSV data from decompressed bytes.
116
126
  """
117
127
  text_data = TextIOWrapper(data, encoding=self.encoding) # type: ignore
118
- reader = csv.DictReader(text_data, delimiter=self.delimiter or ",")
119
- yield from reader
128
+ reader = csv.DictReader(text_data, delimiter=self._get_delimiter() or ",")
129
+ for row in reader:
130
+ yield row
120
131
 
121
132
 
122
133
  @dataclass
@@ -136,6 +136,7 @@ class ResponseToFileExtractor(RecordExtractor):
136
136
  """
137
137
 
138
138
  try:
139
+ # TODO: Add support for other file types, like `json`, with `pd.read_json()`
139
140
  with open(path, "r", encoding=file_encoding) as data:
140
141
  chunks = pd.read_csv(
141
142
  data, chunksize=chunk_size, iterator=True, dialect="unix", dtype=object
@@ -95,6 +95,10 @@ class ConcurrentPerPartitionCursor(Cursor):
95
95
  # the oldest partitions can be efficiently removed, maintaining the most recent partitions.
96
96
  self._cursor_per_partition: OrderedDict[str, ConcurrentCursor] = OrderedDict()
97
97
  self._semaphore_per_partition: OrderedDict[str, threading.Semaphore] = OrderedDict()
98
+
99
+ # Parent-state tracking: store each partition’s parent state in creation order
100
+ self._partition_parent_state_map: OrderedDict[str, Mapping[str, Any]] = OrderedDict()
101
+
98
102
  self._finished_partitions: set[str] = set()
99
103
  self._lock = threading.Lock()
100
104
  self._timer = Timer()
@@ -155,11 +159,62 @@ class ConcurrentPerPartitionCursor(Cursor):
155
159
  and self._semaphore_per_partition[partition_key]._value == 0
156
160
  ):
157
161
  self._update_global_cursor(cursor.state[self.cursor_field.cursor_field_key])
158
- self._emit_state_message()
162
+
163
+ self._check_and_update_parent_state()
164
+
165
+ self._emit_state_message()
166
+
167
+ def _check_and_update_parent_state(self) -> None:
168
+ """
169
+ Pop the leftmost partition state from _partition_parent_state_map only if
170
+ *all partitions* up to (and including) that partition key in _semaphore_per_partition
171
+ are fully finished (i.e. in _finished_partitions and semaphore._value == 0).
172
+ Additionally, delete finished semaphores with a value of 0 to free up memory,
173
+ as they are only needed to track errors and completion status.
174
+ """
175
+ last_closed_state = None
176
+
177
+ while self._partition_parent_state_map:
178
+ # Look at the earliest partition key in creation order
179
+ earliest_key = next(iter(self._partition_parent_state_map))
180
+
181
+ # Verify ALL partitions from the left up to earliest_key are finished
182
+ all_left_finished = True
183
+ for p_key, sem in list(
184
+ self._semaphore_per_partition.items()
185
+ ): # Use list to allow modification during iteration
186
+ # If any earlier partition is still not finished, we must stop
187
+ if p_key not in self._finished_partitions or sem._value != 0:
188
+ all_left_finished = False
189
+ break
190
+ # Once we've reached earliest_key in the semaphore order, we can stop checking
191
+ if p_key == earliest_key:
192
+ break
193
+
194
+ # If the partitions up to earliest_key are not all finished, break the while-loop
195
+ if not all_left_finished:
196
+ break
197
+
198
+ # Pop the leftmost entry from parent-state map
199
+ _, closed_parent_state = self._partition_parent_state_map.popitem(last=False)
200
+ last_closed_state = closed_parent_state
201
+
202
+ # Clean up finished semaphores with value 0 up to and including earliest_key
203
+ for p_key in list(self._semaphore_per_partition.keys()):
204
+ sem = self._semaphore_per_partition[p_key]
205
+ if p_key in self._finished_partitions and sem._value == 0:
206
+ del self._semaphore_per_partition[p_key]
207
+ logger.debug(f"Deleted finished semaphore for partition {p_key} with value 0")
208
+ if p_key == earliest_key:
209
+ break
210
+
211
+ # Update _parent_state if we popped at least one partition
212
+ if last_closed_state is not None:
213
+ self._parent_state = last_closed_state
159
214
 
160
215
  def ensure_at_least_one_state_emitted(self) -> None:
161
216
  """
162
- The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
217
+ The platform expects at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
163
218
  called.
164
219
  """
165
220
  if not any(
@@ -201,13 +256,19 @@ class ConcurrentPerPartitionCursor(Cursor):
201
256
 
202
257
  slices = self._partition_router.stream_slices()
203
258
  self._timer.start()
204
- for partition in slices:
205
- yield from self._generate_slices_from_partition(partition)
259
+ for partition, last, parent_state in iterate_with_last_flag_and_state(
260
+ slices, self._partition_router.get_stream_state
261
+ ):
262
+ yield from self._generate_slices_from_partition(partition, parent_state)
206
263
 
207
- def _generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
264
+ def _generate_slices_from_partition(
265
+ self, partition: StreamSlice, parent_state: Mapping[str, Any]
266
+ ) -> Iterable[StreamSlice]:
208
267
  # Ensure the maximum number of partitions is not exceeded
209
268
  self._ensure_partition_limit()
210
269
 
270
+ partition_key = self._to_partition_key(partition.partition)
271
+
211
272
  cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
212
273
  if not cursor:
213
274
  cursor = self._create_cursor(
@@ -216,18 +277,26 @@ class ConcurrentPerPartitionCursor(Cursor):
216
277
  )
217
278
  with self._lock:
218
279
  self._number_of_partitions += 1
219
- self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
220
- self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
221
- threading.Semaphore(0)
222
- )
280
+ self._cursor_per_partition[partition_key] = cursor
281
+ self._semaphore_per_partition[partition_key] = threading.Semaphore(0)
282
+
283
+ with self._lock:
284
+ if (
285
+ len(self._partition_parent_state_map) == 0
286
+ or self._partition_parent_state_map[
287
+ next(reversed(self._partition_parent_state_map))
288
+ ]
289
+ != parent_state
290
+ ):
291
+ self._partition_parent_state_map[partition_key] = deepcopy(parent_state)
223
292
 
224
293
  for cursor_slice, is_last_slice, _ in iterate_with_last_flag_and_state(
225
294
  cursor.stream_slices(),
226
295
  lambda: None,
227
296
  ):
228
- self._semaphore_per_partition[self._to_partition_key(partition.partition)].release()
297
+ self._semaphore_per_partition[partition_key].release()
229
298
  if is_last_slice:
230
- self._finished_partitions.add(self._to_partition_key(partition.partition))
299
+ self._finished_partitions.add(partition_key)
231
300
  yield StreamSlice(
232
301
  partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
233
302
  )
@@ -257,9 +326,9 @@ class ConcurrentPerPartitionCursor(Cursor):
257
326
  while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
258
327
  # Try removing finished partitions first
259
328
  for partition_key in list(self._cursor_per_partition.keys()):
260
- if (
261
- partition_key in self._finished_partitions
262
- and self._semaphore_per_partition[partition_key]._value == 0
329
+ if partition_key in self._finished_partitions and (
330
+ partition_key not in self._semaphore_per_partition
331
+ or self._semaphore_per_partition[partition_key]._value == 0
263
332
  ):
264
333
  oldest_partition = self._cursor_per_partition.pop(
265
334
  partition_key
@@ -338,9 +407,6 @@ class ConcurrentPerPartitionCursor(Cursor):
338
407
  self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
339
408
  self._create_cursor(state["cursor"])
340
409
  )
341
- self._semaphore_per_partition[self._to_partition_key(state["partition"])] = (
342
- threading.Semaphore(0)
343
- )
344
410
 
345
411
  # set default state for missing partitions if it is per partition with fallback to global
346
412
  if self._GLOBAL_STATE_KEY in stream_state:
@@ -173,6 +173,8 @@ def format_datetime(
173
173
  )
174
174
  if format == "%s":
175
175
  return str(int(dt_datetime.timestamp()))
176
+ elif format == "%ms":
177
+ return str(int(dt_datetime.timestamp() * 1_000_000))
176
178
  return dt_datetime.strftime(format)
177
179
 
178
180
 
@@ -646,7 +646,7 @@ class Rate(BaseModel):
646
646
  class Config:
647
647
  extra = Extra.allow
648
648
 
649
- limit: int = Field(
649
+ limit: Union[int, str] = Field(
650
650
  ...,
651
651
  description="The maximum number of calls allowed within the interval.",
652
652
  title="Limit",
@@ -1508,6 +1508,28 @@ class AuthFlow(BaseModel):
1508
1508
  oauth_config_specification: Optional[OAuthConfigSpecification] = None
1509
1509
 
1510
1510
 
1511
+ class IncrementingCountCursor(BaseModel):
1512
+ type: Literal["IncrementingCountCursor"]
1513
+ cursor_field: str = Field(
1514
+ ...,
1515
+ description="The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.",
1516
+ examples=["created_at", "{{ config['record_cursor'] }}"],
1517
+ title="Cursor Field",
1518
+ )
1519
+ start_value: Optional[Union[str, int]] = Field(
1520
+ None,
1521
+ description="The value that determines the earliest record that should be synced.",
1522
+ examples=[0, "{{ config['start_value'] }}"],
1523
+ title="Start Value",
1524
+ )
1525
+ start_value_option: Optional[RequestOption] = Field(
1526
+ None,
1527
+ description="Optionally configures how the start value will be sent in requests to the source API.",
1528
+ title="Inject Start Value Into Outgoing HTTP Request",
1529
+ )
1530
+ parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1531
+
1532
+
1511
1533
  class DatetimeBasedCursor(BaseModel):
1512
1534
  type: Literal["DatetimeBasedCursor"]
1513
1535
  clamping: Optional[Clamping] = Field(
@@ -1948,7 +1970,9 @@ class DeclarativeStream(BaseModel):
1948
1970
  description="Component used to coordinate how records are extracted across stream slices and request pages.",
1949
1971
  title="Retriever",
1950
1972
  )
1951
- incremental_sync: Optional[Union[CustomIncrementalSync, DatetimeBasedCursor]] = Field(
1973
+ incremental_sync: Optional[
1974
+ Union[CustomIncrementalSync, DatetimeBasedCursor, IncrementingCountCursor]
1975
+ ] = Field(
1952
1976
  None,
1953
1977
  description="Component used to fetch data incrementally based on a time field in the data.",
1954
1978
  title="Incremental Sync",
@@ -2225,15 +2249,7 @@ class SimpleRetriever(BaseModel):
2225
2249
  CustomPartitionRouter,
2226
2250
  ListPartitionRouter,
2227
2251
  SubstreamPartitionRouter,
2228
- GroupingPartitionRouter,
2229
- List[
2230
- Union[
2231
- CustomPartitionRouter,
2232
- ListPartitionRouter,
2233
- SubstreamPartitionRouter,
2234
- GroupingPartitionRouter,
2235
- ]
2236
- ],
2252
+ List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
2237
2253
  ]
2238
2254
  ] = Field(
2239
2255
  [],
@@ -2271,7 +2287,7 @@ class AsyncRetriever(BaseModel):
2271
2287
  status_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field(
2272
2288
  ..., description="Responsible for fetching the actual status of the async job."
2273
2289
  )
2274
- urls_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field(
2290
+ download_target_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field(
2275
2291
  ...,
2276
2292
  description="Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.",
2277
2293
  )
@@ -2286,7 +2302,7 @@ class AsyncRetriever(BaseModel):
2286
2302
  ...,
2287
2303
  description="Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.",
2288
2304
  )
2289
- url_requester: Optional[Union[CustomRequester, HttpRequester]] = Field(
2305
+ download_target_requester: Optional[Union[CustomRequester, HttpRequester]] = Field(
2290
2306
  None,
2291
2307
  description="Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.",
2292
2308
  )
@@ -2311,15 +2327,7 @@ class AsyncRetriever(BaseModel):
2311
2327
  CustomPartitionRouter,
2312
2328
  ListPartitionRouter,
2313
2329
  SubstreamPartitionRouter,
2314
- GroupingPartitionRouter,
2315
- List[
2316
- Union[
2317
- CustomPartitionRouter,
2318
- ListPartitionRouter,
2319
- SubstreamPartitionRouter,
2320
- GroupingPartitionRouter,
2321
- ]
2322
- ],
2330
+ List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
2323
2331
  ]
2324
2332
  ] = Field(
2325
2333
  [],
@@ -2371,29 +2379,6 @@ class SubstreamPartitionRouter(BaseModel):
2371
2379
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2372
2380
 
2373
2381
 
2374
- class GroupingPartitionRouter(BaseModel):
2375
- type: Literal["GroupingPartitionRouter"]
2376
- group_size: int = Field(
2377
- ...,
2378
- description="The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.",
2379
- examples=[10, 50],
2380
- title="Group Size",
2381
- )
2382
- underlying_partition_router: Union[
2383
- CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter
2384
- ] = Field(
2385
- ...,
2386
- description="The partition router whose output will be grouped. This can be any valid partition router component.",
2387
- title="Underlying Partition Router",
2388
- )
2389
- deduplicate: Optional[bool] = Field(
2390
- True,
2391
- description="If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.",
2392
- title="Deduplicate Partitions",
2393
- )
2394
- parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2395
-
2396
-
2397
2382
  class HttpComponentsResolver(BaseModel):
2398
2383
  type: Literal["HttpComponentsResolver"]
2399
2384
  retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field(
@@ -45,7 +45,7 @@ class AirbyteCustomCodeNotPermittedError(Exception):
45
45
  def _hash_text(input_text: str, hash_type: str = "md5") -> str:
46
46
  """Return the hash of the input text using the specified hash type."""
47
47
  if not input_text:
48
- raise ValueError("Input text cannot be empty.")
48
+ raise ValueError("Hash input text cannot be empty.")
49
49
 
50
50
  hash_object = CHECKSUM_FUNCTIONS[hash_type]()
51
51
  hash_object.update(input_text.encode())
@@ -68,6 +68,10 @@ def validate_python_code(
68
68
 
69
69
  Currently we fail if no checksums are provided, although this may change in the future.
70
70
  """
71
+ if not code_text:
72
+ # No code provided, nothing to validate.
73
+ return
74
+
71
75
  if not checksums:
72
76
  raise ValueError(f"A checksum is required to validate the code. Received: {checksums}")
73
77
 
@@ -77,8 +81,18 @@ def validate_python_code(
77
81
  f"Unsupported checksum type: {checksum_type}. Supported checksum types are: {CHECKSUM_FUNCTIONS.keys()}"
78
82
  )
79
83
 
80
- if _hash_text(code_text, checksum_type) != checksum:
81
- raise AirbyteCodeTamperedError(f"{checksum_type} checksum does not match.")
84
+ calculated_checksum = _hash_text(code_text, checksum_type)
85
+ if calculated_checksum != checksum:
86
+ raise AirbyteCodeTamperedError(
87
+ f"{checksum_type} checksum does not match."
88
+ + str(
89
+ {
90
+ "expected_checksum": checksum,
91
+ "actual_checksum": calculated_checksum,
92
+ "code_text": code_text,
93
+ }
94
+ ),
95
+ )
82
96
 
83
97
 
84
98
  def get_registered_components_module(
@@ -94,7 +108,7 @@ def get_registered_components_module(
94
108
 
95
109
  Returns `None` if no components is provided and the `components` module is not found.
96
110
  """
97
- if config and INJECTED_COMPONENTS_PY in config:
111
+ if config and config.get(INJECTED_COMPONENTS_PY, None):
98
112
  if not custom_code_execution_permitted():
99
113
  raise AirbyteCustomCodeNotPermittedError
100
114