airbyte-cdk 6.37.0.dev1__py3-none-any.whl → 6.37.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. airbyte_cdk/connector_builder/models.py +16 -14
  2. airbyte_cdk/connector_builder/test_reader/helpers.py +120 -22
  3. airbyte_cdk/connector_builder/test_reader/message_grouper.py +16 -3
  4. airbyte_cdk/connector_builder/test_reader/types.py +9 -1
  5. airbyte_cdk/sources/declarative/auth/token_provider.py +1 -0
  6. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +43 -7
  7. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +7 -1
  8. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +77 -48
  9. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +13 -2
  10. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +1 -0
  11. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +83 -17
  12. airbyte_cdk/sources/declarative/interpolation/macros.py +2 -0
  13. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +37 -50
  14. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +18 -4
  15. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +171 -70
  16. airbyte_cdk/sources/declarative/partition_routers/__init__.py +0 -4
  17. airbyte_cdk/sources/declarative/requesters/README.md +5 -5
  18. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +60 -17
  19. airbyte_cdk/sources/declarative/requesters/http_requester.py +49 -17
  20. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +25 -4
  21. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +6 -1
  22. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +7 -2
  23. airbyte_cdk/sources/declarative/requesters/requester.py +7 -1
  24. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +10 -3
  25. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +21 -4
  26. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +2 -2
  27. airbyte_cdk/sources/http_logger.py +3 -0
  28. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +2 -1
  29. airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
  30. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +3 -3
  31. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +1 -0
  32. airbyte_cdk/sources/types.py +1 -0
  33. airbyte_cdk/utils/mapping_helpers.py +18 -1
  34. {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.2.dist-info}/METADATA +4 -4
  35. {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.2.dist-info}/RECORD +39 -44
  36. airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +0 -136
  37. airbyte_cdk/sources/embedded/__init__.py +0 -3
  38. airbyte_cdk/sources/embedded/base_integration.py +0 -61
  39. airbyte_cdk/sources/embedded/catalog.py +0 -57
  40. airbyte_cdk/sources/embedded/runner.py +0 -57
  41. airbyte_cdk/sources/embedded/tools.py +0 -27
  42. {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.2.dist-info}/LICENSE.txt +0 -0
  43. {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.2.dist-info}/LICENSE_SHORT +0 -0
  44. {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.2.dist-info}/WHEEL +0 -0
  45. {airbyte_cdk-6.37.0.dev1.dist-info → airbyte_cdk-6.37.2.dist-info}/entry_points.txt +0 -0
@@ -777,6 +777,44 @@ definitions:
777
777
  type:
778
778
  type: string
779
779
  enum: [LegacyToPerPartitionStateMigration]
780
+ IncrementingCountCursor:
781
+ title: Incrementing Count Cursor
782
+ description: Cursor that allows for incremental sync according to a continuously increasing integer.
783
+ type: object
784
+ required:
785
+ - type
786
+ - cursor_field
787
+ properties:
788
+ type:
789
+ type: string
790
+ enum: [IncrementingCountCursor]
791
+ cursor_field:
792
+ title: Cursor Field
793
+ description: The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.
794
+ type: string
795
+ interpolation_context:
796
+ - config
797
+ examples:
798
+ - "created_at"
799
+ - "{{ config['record_cursor'] }}"
800
+ start_value:
801
+ title: Start Value
802
+ description: The value that determines the earliest record that should be synced.
803
+ anyOf:
804
+ - type: string
805
+ - type: integer
806
+ interpolation_context:
807
+ - config
808
+ examples:
809
+ - 0
810
+ - "{{ config['start_value'] }}"
811
+ start_value_option:
812
+ title: Inject Start Value Into Outgoing HTTP Request
813
+ description: Optionally configures how the start value will be sent in requests to the source API.
814
+ "$ref": "#/definitions/RequestOption"
815
+ $parameters:
816
+ type: object
817
+ additionalProperties: true
780
818
  DatetimeBasedCursor:
781
819
  title: Datetime Based Cursor
782
820
  description: Cursor to provide incremental capabilities over datetime.
@@ -844,6 +882,7 @@ definitions:
844
882
  * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`
845
883
  * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`
846
884
  * **%f**: Microsecond (zero-padded to 6 digits) - `000000`
885
+ * **%_ms**: Millisecond (zero-padded to 3 digits) - `000`
847
886
  * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`
848
887
  * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`
849
888
  * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`
@@ -1318,6 +1357,7 @@ definitions:
1318
1357
  anyOf:
1319
1358
  - "$ref": "#/definitions/CustomIncrementalSync"
1320
1359
  - "$ref": "#/definitions/DatetimeBasedCursor"
1360
+ - "$ref": "#/definitions/IncrementingCountCursor"
1321
1361
  name:
1322
1362
  title: Name
1323
1363
  description: The stream name.
@@ -1490,7 +1530,11 @@ definitions:
1490
1530
  limit:
1491
1531
  title: Limit
1492
1532
  description: The maximum number of calls allowed within the interval.
1493
- type: integer
1533
+ anyOf:
1534
+ - type: integer
1535
+ - type: string
1536
+ interpolation_context:
1537
+ - config
1494
1538
  interval:
1495
1539
  title: Interval
1496
1540
  description: The time interval for the rate limit.
@@ -1750,7 +1794,6 @@ definitions:
1750
1794
  type: object
1751
1795
  required:
1752
1796
  - type
1753
- - path
1754
1797
  - url_base
1755
1798
  properties:
1756
1799
  type:
@@ -1762,9 +1805,18 @@ definitions:
1762
1805
  type: string
1763
1806
  interpolation_context:
1764
1807
  - config
1808
+ - next_page_token
1809
+ - stream_interval
1810
+ - stream_partition
1811
+ - stream_slice
1812
+ - creation_response
1813
+ - polling_response
1814
+ - download_target
1765
1815
  examples:
1766
1816
  - "https://connect.squareup.com/v2"
1767
- - "{{ config['base_url'] or 'https://app.posthog.com'}}/api/"
1817
+ - "{{ config['base_url'] or 'https://app.posthog.com'}}/api"
1818
+ - "https://connect.squareup.com/v2/quotes/{{ stream_partition['id'] }}/quote_line_groups"
1819
+ - "https://example.com/api/v1/resource/{{ next_page_token['id'] }}"
1768
1820
  path:
1769
1821
  title: URL Path
1770
1822
  description: Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.
@@ -1775,6 +1827,9 @@ definitions:
1775
1827
  - stream_interval
1776
1828
  - stream_partition
1777
1829
  - stream_slice
1830
+ - creation_response
1831
+ - polling_response
1832
+ - download_target
1778
1833
  examples:
1779
1834
  - "/products"
1780
1835
  - "/quotes/{{ stream_partition['id'] }}/quote_line_groups"
@@ -2394,6 +2449,7 @@ definitions:
2394
2449
  * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`
2395
2450
  * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`
2396
2451
  * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`
2452
+ * **%_ms**: Millisecond (zero-padded to 3 digits) - `000`, `001`, ..., `999`
2397
2453
  * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`
2398
2454
  * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`
2399
2455
  * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`
@@ -3130,14 +3186,12 @@ definitions:
3130
3186
  - "$ref": "#/definitions/CustomPartitionRouter"
3131
3187
  - "$ref": "#/definitions/ListPartitionRouter"
3132
3188
  - "$ref": "#/definitions/SubstreamPartitionRouter"
3133
- - "$ref": "#/definitions/GroupingPartitionRouter"
3134
3189
  - type: array
3135
3190
  items:
3136
3191
  anyOf:
3137
3192
  - "$ref": "#/definitions/CustomPartitionRouter"
3138
3193
  - "$ref": "#/definitions/ListPartitionRouter"
3139
3194
  - "$ref": "#/definitions/SubstreamPartitionRouter"
3140
- - "$ref": "#/definitions/GroupingPartitionRouter"
3141
3195
  decoder:
3142
3196
  title: Decoder
3143
3197
  description: Component decoding the response so records can be extracted.
@@ -3221,7 +3275,7 @@ definitions:
3221
3275
  - polling_requester
3222
3276
  - download_requester
3223
3277
  - status_extractor
3224
- - urls_extractor
3278
+ - download_target_extractor
3225
3279
  properties:
3226
3280
  type:
3227
3281
  type: string
@@ -3238,7 +3292,7 @@ definitions:
3238
3292
  anyOf:
3239
3293
  - "$ref": "#/definitions/CustomRecordExtractor"
3240
3294
  - "$ref": "#/definitions/DpathExtractor"
3241
- urls_extractor:
3295
+ download_target_extractor:
3242
3296
  description: Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.
3243
3297
  anyOf:
3244
3298
  - "$ref": "#/definitions/CustomRecordExtractor"
@@ -3259,7 +3313,7 @@ definitions:
3259
3313
  anyOf:
3260
3314
  - "$ref": "#/definitions/CustomRequester"
3261
3315
  - "$ref": "#/definitions/HttpRequester"
3262
- url_requester:
3316
+ download_target_requester:
3263
3317
  description: Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.
3264
3318
  anyOf:
3265
3319
  - "$ref": "#/definitions/CustomRequester"
@@ -3292,14 +3346,12 @@ definitions:
3292
3346
  - "$ref": "#/definitions/CustomPartitionRouter"
3293
3347
  - "$ref": "#/definitions/ListPartitionRouter"
3294
3348
  - "$ref": "#/definitions/SubstreamPartitionRouter"
3295
- - "$ref": "#/definitions/GroupingPartitionRouter"
3296
3349
  - type: array
3297
3350
  items:
3298
3351
  anyOf:
3299
3352
  - "$ref": "#/definitions/CustomPartitionRouter"
3300
3353
  - "$ref": "#/definitions/ListPartitionRouter"
3301
3354
  - "$ref": "#/definitions/SubstreamPartitionRouter"
3302
- - "$ref": "#/definitions/GroupingPartitionRouter"
3303
3355
  decoder:
3304
3356
  title: Decoder
3305
3357
  description: Component decoding the response so records can be extracted.
@@ -3416,44 +3468,6 @@ definitions:
3416
3468
  $parameters:
3417
3469
  type: object
3418
3470
  additionalProperties: true
3419
- GroupingPartitionRouter:
3420
- title: Grouping Partition Router
3421
- description: >
3422
- A decorator on top of a partition router that groups partitions into batches of a specified size.
3423
- This is useful for APIs that support filtering by multiple partition keys in a single request.
3424
- Note that per-partition incremental syncs may not work as expected because the grouping
3425
- of partitions might change between syncs, potentially leading to inconsistent state tracking.
3426
- type: object
3427
- required:
3428
- - type
3429
- - group_size
3430
- - underlying_partition_router
3431
- properties:
3432
- type:
3433
- type: string
3434
- enum: [GroupingPartitionRouter]
3435
- group_size:
3436
- title: Group Size
3437
- description: The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.
3438
- type: integer
3439
- examples:
3440
- - 10
3441
- - 50
3442
- underlying_partition_router:
3443
- title: Underlying Partition Router
3444
- description: The partition router whose output will be grouped. This can be any valid partition router component.
3445
- anyOf:
3446
- - "$ref": "#/definitions/CustomPartitionRouter"
3447
- - "$ref": "#/definitions/ListPartitionRouter"
3448
- - "$ref": "#/definitions/SubstreamPartitionRouter"
3449
- deduplicate:
3450
- title: Deduplicate Partitions
3451
- description: If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.
3452
- type: boolean
3453
- default: true
3454
- $parameters:
3455
- type: object
3456
- additionalProperties: true
3457
3471
  WaitUntilTimeFromHeader:
3458
3472
  title: Wait Until Time Defined In Response Header
3459
3473
  description: Extract time at which we can retry the request from response header and wait for the difference between now and that time.
@@ -3705,6 +3719,21 @@ interpolation:
3705
3719
  self: https://api.sendgrid.com/v3/marketing/lists?page_size=1&page_token=
3706
3720
  next: https://api.sendgrid.com/v3/marketing/lists?page_size=1&page_token=0236d6d2
3707
3721
  count: 82
3722
+ - title: creation_response
3723
+ description: The response received from the creation_requester in the AsyncRetriever component.
3724
+ type: object
3725
+ examples:
3726
+ - id: "1234"
3727
+ - title: polling_response
3728
+ description: The response received from the polling_requester in the AsyncRetriever component.
3729
+ type: object
3730
+ examples:
3731
+ - id: "1234"
3732
+ - title: download_target
3733
+ description: The `URL` received from the polling_requester in the AsyncRetriever with jobStatus as `COMPLETED`.
3734
+ type: string
3735
+ examples:
3736
+ - "https://api.sendgrid.com/v3/marketing/lists?page_size=1&page_token=0236d6d2&filename=xxx_yyy_zzz.csv"
3708
3737
  - title: stream_interval
3709
3738
  description: The current stream interval being processed. The keys are defined by the incremental sync component. Default keys are `start_time` and `end_time`.
3710
3739
  type: object
@@ -107,6 +107,16 @@ class CsvParser(Parser):
107
107
  encoding: Optional[str] = "utf-8"
108
108
  delimiter: Optional[str] = ","
109
109
 
110
+ def _get_delimiter(self) -> Optional[str]:
111
+ """
112
+ Get delimiter from the configuration. Check for the escape character and decode it.
113
+ """
114
+ if self.delimiter is not None:
115
+ if self.delimiter.startswith("\\"):
116
+ self.delimiter = self.delimiter.encode("utf-8").decode("unicode_escape")
117
+
118
+ return self.delimiter
119
+
110
120
  def parse(
111
121
  self,
112
122
  data: BufferedIOBase,
@@ -115,8 +125,9 @@ class CsvParser(Parser):
115
125
  Parse CSV data from decompressed bytes.
116
126
  """
117
127
  text_data = TextIOWrapper(data, encoding=self.encoding) # type: ignore
118
- reader = csv.DictReader(text_data, delimiter=self.delimiter or ",")
119
- yield from reader
128
+ reader = csv.DictReader(text_data, delimiter=self._get_delimiter() or ",")
129
+ for row in reader:
130
+ yield row
120
131
 
121
132
 
122
133
  @dataclass
@@ -136,6 +136,7 @@ class ResponseToFileExtractor(RecordExtractor):
136
136
  """
137
137
 
138
138
  try:
139
+ # TODO: Add support for other file types, like `json`, with `pd.read_json()`
139
140
  with open(path, "r", encoding=file_encoding) as data:
140
141
  chunks = pd.read_csv(
141
142
  data, chunksize=chunk_size, iterator=True, dialect="unix", dtype=object
@@ -95,6 +95,10 @@ class ConcurrentPerPartitionCursor(Cursor):
95
95
  # the oldest partitions can be efficiently removed, maintaining the most recent partitions.
96
96
  self._cursor_per_partition: OrderedDict[str, ConcurrentCursor] = OrderedDict()
97
97
  self._semaphore_per_partition: OrderedDict[str, threading.Semaphore] = OrderedDict()
98
+
99
+ # Parent-state tracking: store each partition’s parent state in creation order
100
+ self._partition_parent_state_map: OrderedDict[str, Mapping[str, Any]] = OrderedDict()
101
+
98
102
  self._finished_partitions: set[str] = set()
99
103
  self._lock = threading.Lock()
100
104
  self._timer = Timer()
@@ -155,11 +159,62 @@ class ConcurrentPerPartitionCursor(Cursor):
155
159
  and self._semaphore_per_partition[partition_key]._value == 0
156
160
  ):
157
161
  self._update_global_cursor(cursor.state[self.cursor_field.cursor_field_key])
158
- self._emit_state_message()
162
+
163
+ self._check_and_update_parent_state()
164
+
165
+ self._emit_state_message()
166
+
167
+ def _check_and_update_parent_state(self) -> None:
168
+ """
169
+ Pop the leftmost partition state from _partition_parent_state_map only if
170
+ *all partitions* up to (and including) that partition key in _semaphore_per_partition
171
+ are fully finished (i.e. in _finished_partitions and semaphore._value == 0).
172
+ Additionally, delete finished semaphores with a value of 0 to free up memory,
173
+ as they are only needed to track errors and completion status.
174
+ """
175
+ last_closed_state = None
176
+
177
+ while self._partition_parent_state_map:
178
+ # Look at the earliest partition key in creation order
179
+ earliest_key = next(iter(self._partition_parent_state_map))
180
+
181
+ # Verify ALL partitions from the left up to earliest_key are finished
182
+ all_left_finished = True
183
+ for p_key, sem in list(
184
+ self._semaphore_per_partition.items()
185
+ ): # Use list to allow modification during iteration
186
+ # If any earlier partition is still not finished, we must stop
187
+ if p_key not in self._finished_partitions or sem._value != 0:
188
+ all_left_finished = False
189
+ break
190
+ # Once we've reached earliest_key in the semaphore order, we can stop checking
191
+ if p_key == earliest_key:
192
+ break
193
+
194
+ # If the partitions up to earliest_key are not all finished, break the while-loop
195
+ if not all_left_finished:
196
+ break
197
+
198
+ # Pop the leftmost entry from parent-state map
199
+ _, closed_parent_state = self._partition_parent_state_map.popitem(last=False)
200
+ last_closed_state = closed_parent_state
201
+
202
+ # Clean up finished semaphores with value 0 up to and including earliest_key
203
+ for p_key in list(self._semaphore_per_partition.keys()):
204
+ sem = self._semaphore_per_partition[p_key]
205
+ if p_key in self._finished_partitions and sem._value == 0:
206
+ del self._semaphore_per_partition[p_key]
207
+ logger.debug(f"Deleted finished semaphore for partition {p_key} with value 0")
208
+ if p_key == earliest_key:
209
+ break
210
+
211
+ # Update _parent_state if we popped at least one partition
212
+ if last_closed_state is not None:
213
+ self._parent_state = last_closed_state
159
214
 
160
215
  def ensure_at_least_one_state_emitted(self) -> None:
161
216
  """
162
- The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
217
+ The platform expects at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
163
218
  called.
164
219
  """
165
220
  if not any(
@@ -201,13 +256,19 @@ class ConcurrentPerPartitionCursor(Cursor):
201
256
 
202
257
  slices = self._partition_router.stream_slices()
203
258
  self._timer.start()
204
- for partition in slices:
205
- yield from self._generate_slices_from_partition(partition)
259
+ for partition, last, parent_state in iterate_with_last_flag_and_state(
260
+ slices, self._partition_router.get_stream_state
261
+ ):
262
+ yield from self._generate_slices_from_partition(partition, parent_state)
206
263
 
207
- def _generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
264
+ def _generate_slices_from_partition(
265
+ self, partition: StreamSlice, parent_state: Mapping[str, Any]
266
+ ) -> Iterable[StreamSlice]:
208
267
  # Ensure the maximum number of partitions is not exceeded
209
268
  self._ensure_partition_limit()
210
269
 
270
+ partition_key = self._to_partition_key(partition.partition)
271
+
211
272
  cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
212
273
  if not cursor:
213
274
  cursor = self._create_cursor(
@@ -216,18 +277,26 @@ class ConcurrentPerPartitionCursor(Cursor):
216
277
  )
217
278
  with self._lock:
218
279
  self._number_of_partitions += 1
219
- self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
220
- self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
221
- threading.Semaphore(0)
222
- )
280
+ self._cursor_per_partition[partition_key] = cursor
281
+ self._semaphore_per_partition[partition_key] = threading.Semaphore(0)
282
+
283
+ with self._lock:
284
+ if (
285
+ len(self._partition_parent_state_map) == 0
286
+ or self._partition_parent_state_map[
287
+ next(reversed(self._partition_parent_state_map))
288
+ ]
289
+ != parent_state
290
+ ):
291
+ self._partition_parent_state_map[partition_key] = deepcopy(parent_state)
223
292
 
224
293
  for cursor_slice, is_last_slice, _ in iterate_with_last_flag_and_state(
225
294
  cursor.stream_slices(),
226
295
  lambda: None,
227
296
  ):
228
- self._semaphore_per_partition[self._to_partition_key(partition.partition)].release()
297
+ self._semaphore_per_partition[partition_key].release()
229
298
  if is_last_slice:
230
- self._finished_partitions.add(self._to_partition_key(partition.partition))
299
+ self._finished_partitions.add(partition_key)
231
300
  yield StreamSlice(
232
301
  partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
233
302
  )
@@ -257,9 +326,9 @@ class ConcurrentPerPartitionCursor(Cursor):
257
326
  while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
258
327
  # Try removing finished partitions first
259
328
  for partition_key in list(self._cursor_per_partition.keys()):
260
- if (
261
- partition_key in self._finished_partitions
262
- and self._semaphore_per_partition[partition_key]._value == 0
329
+ if partition_key in self._finished_partitions and (
330
+ partition_key not in self._semaphore_per_partition
331
+ or self._semaphore_per_partition[partition_key]._value == 0
263
332
  ):
264
333
  oldest_partition = self._cursor_per_partition.pop(
265
334
  partition_key
@@ -338,9 +407,6 @@ class ConcurrentPerPartitionCursor(Cursor):
338
407
  self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
339
408
  self._create_cursor(state["cursor"])
340
409
  )
341
- self._semaphore_per_partition[self._to_partition_key(state["partition"])] = (
342
- threading.Semaphore(0)
343
- )
344
410
 
345
411
  # set default state for missing partitions if it is per partition with fallback to global
346
412
  if self._GLOBAL_STATE_KEY in stream_state:
@@ -173,6 +173,8 @@ def format_datetime(
173
173
  )
174
174
  if format == "%s":
175
175
  return str(int(dt_datetime.timestamp()))
176
+ elif format == "%ms":
177
+ return str(int(dt_datetime.timestamp() * 1_000_000))
176
178
  return dt_datetime.strftime(format)
177
179
 
178
180
 
@@ -646,7 +646,7 @@ class Rate(BaseModel):
646
646
  class Config:
647
647
  extra = Extra.allow
648
648
 
649
- limit: int = Field(
649
+ limit: Union[int, str] = Field(
650
650
  ...,
651
651
  description="The maximum number of calls allowed within the interval.",
652
652
  title="Limit",
@@ -939,7 +939,7 @@ class MinMaxDatetime(BaseModel):
939
939
  )
940
940
  datetime_format: Optional[str] = Field(
941
941
  "",
942
- description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date representation - `08/16/1988`\n * **%X**: Time representation - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n',
942
+ description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n * **%_ms**: Millisecond (zero-padded to 3 digits) - `000`, `001`, ..., `999`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date representation - `08/16/1988`\n * **%X**: Time representation - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n',
943
943
  examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s"],
944
944
  title="Datetime Format",
945
945
  )
@@ -1508,6 +1508,28 @@ class AuthFlow(BaseModel):
1508
1508
  oauth_config_specification: Optional[OAuthConfigSpecification] = None
1509
1509
 
1510
1510
 
1511
+ class IncrementingCountCursor(BaseModel):
1512
+ type: Literal["IncrementingCountCursor"]
1513
+ cursor_field: str = Field(
1514
+ ...,
1515
+ description="The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.",
1516
+ examples=["created_at", "{{ config['record_cursor'] }}"],
1517
+ title="Cursor Field",
1518
+ )
1519
+ start_value: Optional[Union[str, int]] = Field(
1520
+ None,
1521
+ description="The value that determines the earliest record that should be synced.",
1522
+ examples=[0, "{{ config['start_value'] }}"],
1523
+ title="Start Value",
1524
+ )
1525
+ start_value_option: Optional[RequestOption] = Field(
1526
+ None,
1527
+ description="Optionally configures how the start value will be sent in requests to the source API.",
1528
+ title="Inject Start Value Into Outgoing HTTP Request",
1529
+ )
1530
+ parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1531
+
1532
+
1511
1533
  class DatetimeBasedCursor(BaseModel):
1512
1534
  type: Literal["DatetimeBasedCursor"]
1513
1535
  clamping: Optional[Clamping] = Field(
@@ -1523,7 +1545,7 @@ class DatetimeBasedCursor(BaseModel):
1523
1545
  )
1524
1546
  datetime_format: str = Field(
1525
1547
  ...,
1526
- description="The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp (milliseconds) - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal '%' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n",
1548
+ description="The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp (milliseconds) - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%_ms**: Millisecond (zero-padded to 3 digits) - `000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal '%' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n",
1527
1549
  examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s", "%ms", "%s_as_float"],
1528
1550
  title="Outgoing Datetime Format",
1529
1551
  )
@@ -1948,7 +1970,9 @@ class DeclarativeStream(BaseModel):
1948
1970
  description="Component used to coordinate how records are extracted across stream slices and request pages.",
1949
1971
  title="Retriever",
1950
1972
  )
1951
- incremental_sync: Optional[Union[CustomIncrementalSync, DatetimeBasedCursor]] = Field(
1973
+ incremental_sync: Optional[
1974
+ Union[CustomIncrementalSync, DatetimeBasedCursor, IncrementingCountCursor]
1975
+ ] = Field(
1952
1976
  None,
1953
1977
  description="Component used to fetch data incrementally based on a time field in the data.",
1954
1978
  title="Incremental Sync",
@@ -2048,12 +2072,14 @@ class HttpRequester(BaseModel):
2048
2072
  description="Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.",
2049
2073
  examples=[
2050
2074
  "https://connect.squareup.com/v2",
2051
- "{{ config['base_url'] or 'https://app.posthog.com'}}/api/",
2075
+ "{{ config['base_url'] or 'https://app.posthog.com'}}/api",
2076
+ "https://connect.squareup.com/v2/quotes/{{ stream_partition['id'] }}/quote_line_groups",
2077
+ "https://example.com/api/v1/resource/{{ next_page_token['id'] }}",
2052
2078
  ],
2053
2079
  title="API Base URL",
2054
2080
  )
2055
- path: str = Field(
2056
- ...,
2081
+ path: Optional[str] = Field(
2082
+ None,
2057
2083
  description="Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.",
2058
2084
  examples=[
2059
2085
  "/products",
@@ -2225,15 +2251,7 @@ class SimpleRetriever(BaseModel):
2225
2251
  CustomPartitionRouter,
2226
2252
  ListPartitionRouter,
2227
2253
  SubstreamPartitionRouter,
2228
- GroupingPartitionRouter,
2229
- List[
2230
- Union[
2231
- CustomPartitionRouter,
2232
- ListPartitionRouter,
2233
- SubstreamPartitionRouter,
2234
- GroupingPartitionRouter,
2235
- ]
2236
- ],
2254
+ List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
2237
2255
  ]
2238
2256
  ] = Field(
2239
2257
  [],
@@ -2271,7 +2289,7 @@ class AsyncRetriever(BaseModel):
2271
2289
  status_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field(
2272
2290
  ..., description="Responsible for fetching the actual status of the async job."
2273
2291
  )
2274
- urls_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field(
2292
+ download_target_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field(
2275
2293
  ...,
2276
2294
  description="Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.",
2277
2295
  )
@@ -2286,7 +2304,7 @@ class AsyncRetriever(BaseModel):
2286
2304
  ...,
2287
2305
  description="Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.",
2288
2306
  )
2289
- url_requester: Optional[Union[CustomRequester, HttpRequester]] = Field(
2307
+ download_target_requester: Optional[Union[CustomRequester, HttpRequester]] = Field(
2290
2308
  None,
2291
2309
  description="Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.",
2292
2310
  )
@@ -2311,15 +2329,7 @@ class AsyncRetriever(BaseModel):
2311
2329
  CustomPartitionRouter,
2312
2330
  ListPartitionRouter,
2313
2331
  SubstreamPartitionRouter,
2314
- GroupingPartitionRouter,
2315
- List[
2316
- Union[
2317
- CustomPartitionRouter,
2318
- ListPartitionRouter,
2319
- SubstreamPartitionRouter,
2320
- GroupingPartitionRouter,
2321
- ]
2322
- ],
2332
+ List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
2323
2333
  ]
2324
2334
  ] = Field(
2325
2335
  [],
@@ -2371,29 +2381,6 @@ class SubstreamPartitionRouter(BaseModel):
2371
2381
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2372
2382
 
2373
2383
 
2374
- class GroupingPartitionRouter(BaseModel):
2375
- type: Literal["GroupingPartitionRouter"]
2376
- group_size: int = Field(
2377
- ...,
2378
- description="The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.",
2379
- examples=[10, 50],
2380
- title="Group Size",
2381
- )
2382
- underlying_partition_router: Union[
2383
- CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter
2384
- ] = Field(
2385
- ...,
2386
- description="The partition router whose output will be grouped. This can be any valid partition router component.",
2387
- title="Underlying Partition Router",
2388
- )
2389
- deduplicate: Optional[bool] = Field(
2390
- True,
2391
- description="If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.",
2392
- title="Deduplicate Partitions",
2393
- )
2394
- parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2395
-
2396
-
2397
2384
  class HttpComponentsResolver(BaseModel):
2398
2385
  type: Literal["HttpComponentsResolver"]
2399
2386
  retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field(