airbyte-cdk 6.37.2.dev1__py3-none-any.whl → 6.37.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +28 -7
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +7 -1
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +72 -5
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +2 -3
- airbyte_cdk/sources/declarative/interpolation/macros.py +2 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +34 -8
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +18 -4
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +119 -7
- airbyte_cdk/sources/declarative/requesters/README.md +5 -5
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +18 -13
- airbyte_cdk/sources/declarative/requesters/http_requester.py +49 -17
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +25 -4
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +6 -1
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +7 -2
- airbyte_cdk/sources/declarative/requesters/requester.py +7 -1
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +21 -4
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +2 -2
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +0 -1
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +2 -1
- airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +3 -3
- airbyte_cdk/sources/types.py +1 -0
- airbyte_cdk/utils/mapping_helpers.py +18 -1
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.37.3.dist-info}/METADATA +4 -4
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.37.3.dist-info}/RECORD +29 -33
- airbyte_cdk/sources/embedded/__init__.py +0 -3
- airbyte_cdk/sources/embedded/base_integration.py +0 -61
- airbyte_cdk/sources/embedded/catalog.py +0 -57
- airbyte_cdk/sources/embedded/runner.py +0 -57
- airbyte_cdk/sources/embedded/tools.py +0 -27
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.37.3.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.37.3.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.37.3.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.37.3.dist-info}/entry_points.txt +0 -0
@@ -31,6 +31,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
31
31
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
32
32
|
DatetimeBasedCursor as DatetimeBasedCursorModel,
|
33
33
|
)
|
34
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
35
|
+
IncrementingCountCursor as IncrementingCountCursorModel,
|
36
|
+
)
|
34
37
|
from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
|
35
38
|
ModelToComponentFactory,
|
36
39
|
)
|
@@ -222,7 +225,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
222
225
|
and not incremental_sync_component_definition
|
223
226
|
)
|
224
227
|
|
225
|
-
if self.
|
228
|
+
if self._is_concurrent_cursor_incremental_without_partition_routing(
|
226
229
|
declarative_stream, incremental_sync_component_definition
|
227
230
|
):
|
228
231
|
stream_state = self._connector_state_manager.get_stream_state(
|
@@ -254,15 +257,26 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
254
257
|
stream_slicer=declarative_stream.retriever.stream_slicer,
|
255
258
|
)
|
256
259
|
else:
|
257
|
-
|
258
|
-
|
260
|
+
if (
|
261
|
+
incremental_sync_component_definition
|
262
|
+
and incremental_sync_component_definition.get("type")
|
263
|
+
== IncrementingCountCursorModel.__name__
|
264
|
+
):
|
265
|
+
cursor = self._constructor.create_concurrent_cursor_from_incrementing_count_cursor(
|
266
|
+
model_type=IncrementingCountCursorModel,
|
267
|
+
component_definition=incremental_sync_component_definition, # type: ignore # Not None because of the if condition above
|
268
|
+
stream_name=declarative_stream.name,
|
269
|
+
stream_namespace=declarative_stream.namespace,
|
270
|
+
config=config or {},
|
271
|
+
)
|
272
|
+
else:
|
273
|
+
cursor = self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
|
259
274
|
model_type=DatetimeBasedCursorModel,
|
260
275
|
component_definition=incremental_sync_component_definition, # type: ignore # Not None because of the if condition above
|
261
276
|
stream_name=declarative_stream.name,
|
262
277
|
stream_namespace=declarative_stream.namespace,
|
263
278
|
config=config or {},
|
264
279
|
)
|
265
|
-
)
|
266
280
|
partition_generator = StreamSlicerPartitionGenerator(
|
267
281
|
partition_factory=DeclarativePartitionFactory(
|
268
282
|
declarative_stream.name,
|
@@ -389,7 +403,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
389
403
|
|
390
404
|
return concurrent_streams, synchronous_streams
|
391
405
|
|
392
|
-
def
|
406
|
+
def _is_concurrent_cursor_incremental_without_partition_routing(
|
393
407
|
self,
|
394
408
|
declarative_stream: DeclarativeStream,
|
395
409
|
incremental_sync_component_definition: Mapping[str, Any] | None,
|
@@ -397,11 +411,18 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
397
411
|
return (
|
398
412
|
incremental_sync_component_definition is not None
|
399
413
|
and bool(incremental_sync_component_definition)
|
400
|
-
and
|
401
|
-
|
414
|
+
and (
|
415
|
+
incremental_sync_component_definition.get("type", "")
|
416
|
+
in (DatetimeBasedCursorModel.__name__, IncrementingCountCursorModel.__name__)
|
417
|
+
)
|
402
418
|
and hasattr(declarative_stream.retriever, "stream_slicer")
|
403
419
|
and (
|
404
420
|
isinstance(declarative_stream.retriever.stream_slicer, DatetimeBasedCursor)
|
421
|
+
# IncrementingCountCursorModel is hardcoded to be of type DatetimeBasedCursor
|
422
|
+
# add isintance check here if we want to create a Declarative IncrementingCountCursor
|
423
|
+
# or isinstance(
|
424
|
+
# declarative_stream.retriever.stream_slicer, IncrementingCountCursor
|
425
|
+
# )
|
405
426
|
or isinstance(declarative_stream.retriever.stream_slicer, AsyncJobPartitionRouter)
|
406
427
|
)
|
407
428
|
)
|
@@ -31,7 +31,8 @@ class DatetimeParser:
|
|
31
31
|
return datetime.datetime.fromtimestamp(float(date), tz=datetime.timezone.utc)
|
32
32
|
elif format == "%ms":
|
33
33
|
return self._UNIX_EPOCH + datetime.timedelta(milliseconds=int(date))
|
34
|
-
|
34
|
+
elif "%_ms" in format:
|
35
|
+
format = format.replace("%_ms", "%f")
|
35
36
|
parsed_datetime = datetime.datetime.strptime(str(date), format)
|
36
37
|
if self._is_naive(parsed_datetime):
|
37
38
|
return parsed_datetime.replace(tzinfo=datetime.timezone.utc)
|
@@ -48,6 +49,11 @@ class DatetimeParser:
|
|
48
49
|
if format == "%ms":
|
49
50
|
# timstamp() returns a float representing the number of seconds since the unix epoch
|
50
51
|
return str(int(dt.timestamp() * 1000))
|
52
|
+
if "%_ms" in format:
|
53
|
+
_format = format.replace("%_ms", "%f")
|
54
|
+
milliseconds = int(dt.microsecond / 1000)
|
55
|
+
formatted_dt = dt.strftime(_format).replace(dt.strftime("%f"), "%03d" % milliseconds)
|
56
|
+
return formatted_dt
|
51
57
|
else:
|
52
58
|
return dt.strftime(format)
|
53
59
|
|
@@ -777,6 +777,44 @@ definitions:
|
|
777
777
|
type:
|
778
778
|
type: string
|
779
779
|
enum: [LegacyToPerPartitionStateMigration]
|
780
|
+
IncrementingCountCursor:
|
781
|
+
title: Incrementing Count Cursor
|
782
|
+
description: Cursor that allows for incremental sync according to a continuously increasing integer.
|
783
|
+
type: object
|
784
|
+
required:
|
785
|
+
- type
|
786
|
+
- cursor_field
|
787
|
+
properties:
|
788
|
+
type:
|
789
|
+
type: string
|
790
|
+
enum: [IncrementingCountCursor]
|
791
|
+
cursor_field:
|
792
|
+
title: Cursor Field
|
793
|
+
description: The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.
|
794
|
+
type: string
|
795
|
+
interpolation_context:
|
796
|
+
- config
|
797
|
+
examples:
|
798
|
+
- "created_at"
|
799
|
+
- "{{ config['record_cursor'] }}"
|
800
|
+
start_value:
|
801
|
+
title: Start Value
|
802
|
+
description: The value that determines the earliest record that should be synced.
|
803
|
+
anyOf:
|
804
|
+
- type: string
|
805
|
+
- type: integer
|
806
|
+
interpolation_context:
|
807
|
+
- config
|
808
|
+
examples:
|
809
|
+
- 0
|
810
|
+
- "{{ config['start_value'] }}"
|
811
|
+
start_value_option:
|
812
|
+
title: Inject Start Value Into Outgoing HTTP Request
|
813
|
+
description: Optionally configures how the start value will be sent in requests to the source API.
|
814
|
+
"$ref": "#/definitions/RequestOption"
|
815
|
+
$parameters:
|
816
|
+
type: object
|
817
|
+
additionalProperties: true
|
780
818
|
DatetimeBasedCursor:
|
781
819
|
title: Datetime Based Cursor
|
782
820
|
description: Cursor to provide incremental capabilities over datetime.
|
@@ -844,6 +882,7 @@ definitions:
|
|
844
882
|
* **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`
|
845
883
|
* **%S**: Second (zero-padded) - `00`, `01`, ..., `59`
|
846
884
|
* **%f**: Microsecond (zero-padded to 6 digits) - `000000`
|
885
|
+
* **%_ms**: Millisecond (zero-padded to 3 digits) - `000`
|
847
886
|
* **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`
|
848
887
|
* **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`
|
849
888
|
* **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`
|
@@ -1318,6 +1357,7 @@ definitions:
|
|
1318
1357
|
anyOf:
|
1319
1358
|
- "$ref": "#/definitions/CustomIncrementalSync"
|
1320
1359
|
- "$ref": "#/definitions/DatetimeBasedCursor"
|
1360
|
+
- "$ref": "#/definitions/IncrementingCountCursor"
|
1321
1361
|
name:
|
1322
1362
|
title: Name
|
1323
1363
|
description: The stream name.
|
@@ -1754,7 +1794,6 @@ definitions:
|
|
1754
1794
|
type: object
|
1755
1795
|
required:
|
1756
1796
|
- type
|
1757
|
-
- path
|
1758
1797
|
- url_base
|
1759
1798
|
properties:
|
1760
1799
|
type:
|
@@ -1766,9 +1805,18 @@ definitions:
|
|
1766
1805
|
type: string
|
1767
1806
|
interpolation_context:
|
1768
1807
|
- config
|
1808
|
+
- next_page_token
|
1809
|
+
- stream_interval
|
1810
|
+
- stream_partition
|
1811
|
+
- stream_slice
|
1812
|
+
- creation_response
|
1813
|
+
- polling_response
|
1814
|
+
- download_target
|
1769
1815
|
examples:
|
1770
1816
|
- "https://connect.squareup.com/v2"
|
1771
|
-
- "{{ config['base_url'] or 'https://app.posthog.com'}}/api
|
1817
|
+
- "{{ config['base_url'] or 'https://app.posthog.com'}}/api"
|
1818
|
+
- "https://connect.squareup.com/v2/quotes/{{ stream_partition['id'] }}/quote_line_groups"
|
1819
|
+
- "https://example.com/api/v1/resource/{{ next_page_token['id'] }}"
|
1772
1820
|
path:
|
1773
1821
|
title: URL Path
|
1774
1822
|
description: Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.
|
@@ -1779,6 +1827,9 @@ definitions:
|
|
1779
1827
|
- stream_interval
|
1780
1828
|
- stream_partition
|
1781
1829
|
- stream_slice
|
1830
|
+
- creation_response
|
1831
|
+
- polling_response
|
1832
|
+
- download_target
|
1782
1833
|
examples:
|
1783
1834
|
- "/products"
|
1784
1835
|
- "/quotes/{{ stream_partition['id'] }}/quote_line_groups"
|
@@ -2398,6 +2449,7 @@ definitions:
|
|
2398
2449
|
* **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`
|
2399
2450
|
* **%S**: Second (zero-padded) - `00`, `01`, ..., `59`
|
2400
2451
|
* **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`
|
2452
|
+
* **%_ms**: Millisecond (zero-padded to 3 digits) - `000`, `001`, ..., `999`
|
2401
2453
|
* **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`
|
2402
2454
|
* **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`
|
2403
2455
|
* **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`
|
@@ -3223,7 +3275,7 @@ definitions:
|
|
3223
3275
|
- polling_requester
|
3224
3276
|
- download_requester
|
3225
3277
|
- status_extractor
|
3226
|
-
-
|
3278
|
+
- download_target_extractor
|
3227
3279
|
properties:
|
3228
3280
|
type:
|
3229
3281
|
type: string
|
@@ -3240,7 +3292,7 @@ definitions:
|
|
3240
3292
|
anyOf:
|
3241
3293
|
- "$ref": "#/definitions/CustomRecordExtractor"
|
3242
3294
|
- "$ref": "#/definitions/DpathExtractor"
|
3243
|
-
|
3295
|
+
download_target_extractor:
|
3244
3296
|
description: Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.
|
3245
3297
|
anyOf:
|
3246
3298
|
- "$ref": "#/definitions/CustomRecordExtractor"
|
@@ -3261,7 +3313,7 @@ definitions:
|
|
3261
3313
|
anyOf:
|
3262
3314
|
- "$ref": "#/definitions/CustomRequester"
|
3263
3315
|
- "$ref": "#/definitions/HttpRequester"
|
3264
|
-
|
3316
|
+
download_target_requester:
|
3265
3317
|
description: Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.
|
3266
3318
|
anyOf:
|
3267
3319
|
- "$ref": "#/definitions/CustomRequester"
|
@@ -3667,6 +3719,21 @@ interpolation:
|
|
3667
3719
|
self: https://api.sendgrid.com/v3/marketing/lists?page_size=1&page_token=
|
3668
3720
|
next: https://api.sendgrid.com/v3/marketing/lists?page_size=1&page_token=0236d6d2
|
3669
3721
|
count: 82
|
3722
|
+
- title: creation_response
|
3723
|
+
description: The response received from the creation_requester in the AsyncRetriever component.
|
3724
|
+
type: object
|
3725
|
+
examples:
|
3726
|
+
- id: "1234"
|
3727
|
+
- title: polling_response
|
3728
|
+
description: The response received from the polling_requester in the AsyncRetriever component.
|
3729
|
+
type: object
|
3730
|
+
examples:
|
3731
|
+
- id: "1234"
|
3732
|
+
- title: download_target
|
3733
|
+
description: The `URL` received from the polling_requester in the AsyncRetriever with jobStatus as `COMPLETED`.
|
3734
|
+
type: string
|
3735
|
+
examples:
|
3736
|
+
- "https://api.sendgrid.com/v3/marketing/lists?page_size=1&page_token=0236d6d2&filename=xxx_yyy_zzz.csv"
|
3670
3737
|
- title: stream_interval
|
3671
3738
|
description: The current stream interval being processed. The keys are defined by the incremental sync component. Default keys are `start_time` and `end_time`.
|
3672
3739
|
type: object
|
@@ -5,7 +5,7 @@ import json
|
|
5
5
|
import logging
|
6
6
|
from abc import ABC, abstractmethod
|
7
7
|
from dataclasses import dataclass
|
8
|
-
from io import BufferedIOBase,
|
8
|
+
from io import BufferedIOBase, TextIOWrapper
|
9
9
|
from typing import Any, Generator, MutableMapping, Optional
|
10
10
|
|
11
11
|
import orjson
|
@@ -124,8 +124,7 @@ class CsvParser(Parser):
|
|
124
124
|
"""
|
125
125
|
Parse CSV data from decompressed bytes.
|
126
126
|
"""
|
127
|
-
|
128
|
-
text_data = TextIOWrapper(bytes_data, encoding=self.encoding) # type: ignore
|
127
|
+
text_data = TextIOWrapper(data, encoding=self.encoding) # type: ignore
|
129
128
|
reader = csv.DictReader(text_data, delimiter=self._get_delimiter() or ",")
|
130
129
|
for row in reader:
|
131
130
|
yield row
|
@@ -939,7 +939,7 @@ class MinMaxDatetime(BaseModel):
|
|
939
939
|
)
|
940
940
|
datetime_format: Optional[str] = Field(
|
941
941
|
"",
|
942
|
-
description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date representation - `08/16/1988`\n * **%X**: Time representation - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n',
|
942
|
+
description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n * **%_ms**: Millisecond (zero-padded to 3 digits) - `000`, `001`, ..., `999`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date representation - `08/16/1988`\n * **%X**: Time representation - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n',
|
943
943
|
examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s"],
|
944
944
|
title="Datetime Format",
|
945
945
|
)
|
@@ -1508,6 +1508,28 @@ class AuthFlow(BaseModel):
|
|
1508
1508
|
oauth_config_specification: Optional[OAuthConfigSpecification] = None
|
1509
1509
|
|
1510
1510
|
|
1511
|
+
class IncrementingCountCursor(BaseModel):
|
1512
|
+
type: Literal["IncrementingCountCursor"]
|
1513
|
+
cursor_field: str = Field(
|
1514
|
+
...,
|
1515
|
+
description="The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.",
|
1516
|
+
examples=["created_at", "{{ config['record_cursor'] }}"],
|
1517
|
+
title="Cursor Field",
|
1518
|
+
)
|
1519
|
+
start_value: Optional[Union[str, int]] = Field(
|
1520
|
+
None,
|
1521
|
+
description="The value that determines the earliest record that should be synced.",
|
1522
|
+
examples=[0, "{{ config['start_value'] }}"],
|
1523
|
+
title="Start Value",
|
1524
|
+
)
|
1525
|
+
start_value_option: Optional[RequestOption] = Field(
|
1526
|
+
None,
|
1527
|
+
description="Optionally configures how the start value will be sent in requests to the source API.",
|
1528
|
+
title="Inject Start Value Into Outgoing HTTP Request",
|
1529
|
+
)
|
1530
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1531
|
+
|
1532
|
+
|
1511
1533
|
class DatetimeBasedCursor(BaseModel):
|
1512
1534
|
type: Literal["DatetimeBasedCursor"]
|
1513
1535
|
clamping: Optional[Clamping] = Field(
|
@@ -1523,7 +1545,7 @@ class DatetimeBasedCursor(BaseModel):
|
|
1523
1545
|
)
|
1524
1546
|
datetime_format: str = Field(
|
1525
1547
|
...,
|
1526
|
-
description="The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp (milliseconds) - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal '%' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n",
|
1548
|
+
description="The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp (milliseconds) - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%_ms**: Millisecond (zero-padded to 3 digits) - `000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal '%' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n",
|
1527
1549
|
examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s", "%ms", "%s_as_float"],
|
1528
1550
|
title="Outgoing Datetime Format",
|
1529
1551
|
)
|
@@ -1948,7 +1970,9 @@ class DeclarativeStream(BaseModel):
|
|
1948
1970
|
description="Component used to coordinate how records are extracted across stream slices and request pages.",
|
1949
1971
|
title="Retriever",
|
1950
1972
|
)
|
1951
|
-
incremental_sync: Optional[
|
1973
|
+
incremental_sync: Optional[
|
1974
|
+
Union[CustomIncrementalSync, DatetimeBasedCursor, IncrementingCountCursor]
|
1975
|
+
] = Field(
|
1952
1976
|
None,
|
1953
1977
|
description="Component used to fetch data incrementally based on a time field in the data.",
|
1954
1978
|
title="Incremental Sync",
|
@@ -2048,12 +2072,14 @@ class HttpRequester(BaseModel):
|
|
2048
2072
|
description="Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.",
|
2049
2073
|
examples=[
|
2050
2074
|
"https://connect.squareup.com/v2",
|
2051
|
-
"{{ config['base_url'] or 'https://app.posthog.com'}}/api
|
2075
|
+
"{{ config['base_url'] or 'https://app.posthog.com'}}/api",
|
2076
|
+
"https://connect.squareup.com/v2/quotes/{{ stream_partition['id'] }}/quote_line_groups",
|
2077
|
+
"https://example.com/api/v1/resource/{{ next_page_token['id'] }}",
|
2052
2078
|
],
|
2053
2079
|
title="API Base URL",
|
2054
2080
|
)
|
2055
|
-
path: str = Field(
|
2056
|
-
|
2081
|
+
path: Optional[str] = Field(
|
2082
|
+
None,
|
2057
2083
|
description="Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.",
|
2058
2084
|
examples=[
|
2059
2085
|
"/products",
|
@@ -2263,7 +2289,7 @@ class AsyncRetriever(BaseModel):
|
|
2263
2289
|
status_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field(
|
2264
2290
|
..., description="Responsible for fetching the actual status of the async job."
|
2265
2291
|
)
|
2266
|
-
|
2292
|
+
download_target_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field(
|
2267
2293
|
...,
|
2268
2294
|
description="Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.",
|
2269
2295
|
)
|
@@ -2278,7 +2304,7 @@ class AsyncRetriever(BaseModel):
|
|
2278
2304
|
...,
|
2279
2305
|
description="Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.",
|
2280
2306
|
)
|
2281
|
-
|
2307
|
+
download_target_requester: Optional[Union[CustomRequester, HttpRequester]] = Field(
|
2282
2308
|
None,
|
2283
2309
|
description="Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.",
|
2284
2310
|
)
|
@@ -45,7 +45,7 @@ class AirbyteCustomCodeNotPermittedError(Exception):
|
|
45
45
|
def _hash_text(input_text: str, hash_type: str = "md5") -> str:
|
46
46
|
"""Return the hash of the input text using the specified hash type."""
|
47
47
|
if not input_text:
|
48
|
-
raise ValueError("
|
48
|
+
raise ValueError("Hash input text cannot be empty.")
|
49
49
|
|
50
50
|
hash_object = CHECKSUM_FUNCTIONS[hash_type]()
|
51
51
|
hash_object.update(input_text.encode())
|
@@ -68,6 +68,10 @@ def validate_python_code(
|
|
68
68
|
|
69
69
|
Currently we fail if no checksums are provided, although this may change in the future.
|
70
70
|
"""
|
71
|
+
if not code_text:
|
72
|
+
# No code provided, nothing to validate.
|
73
|
+
return
|
74
|
+
|
71
75
|
if not checksums:
|
72
76
|
raise ValueError(f"A checksum is required to validate the code. Received: {checksums}")
|
73
77
|
|
@@ -77,8 +81,18 @@ def validate_python_code(
|
|
77
81
|
f"Unsupported checksum type: {checksum_type}. Supported checksum types are: {CHECKSUM_FUNCTIONS.keys()}"
|
78
82
|
)
|
79
83
|
|
80
|
-
|
81
|
-
|
84
|
+
calculated_checksum = _hash_text(code_text, checksum_type)
|
85
|
+
if calculated_checksum != checksum:
|
86
|
+
raise AirbyteCodeTamperedError(
|
87
|
+
f"{checksum_type} checksum does not match."
|
88
|
+
+ str(
|
89
|
+
{
|
90
|
+
"expected_checksum": checksum,
|
91
|
+
"actual_checksum": calculated_checksum,
|
92
|
+
"code_text": code_text,
|
93
|
+
}
|
94
|
+
),
|
95
|
+
)
|
82
96
|
|
83
97
|
|
84
98
|
def get_registered_components_module(
|
@@ -94,7 +108,7 @@ def get_registered_components_module(
|
|
94
108
|
|
95
109
|
Returns `None` if no components is provided and the `components` module is not found.
|
96
110
|
"""
|
97
|
-
if config and INJECTED_COMPONENTS_PY
|
111
|
+
if config and config.get(INJECTED_COMPONENTS_PY, None):
|
98
112
|
if not custom_code_execution_permitted():
|
99
113
|
raise AirbyteCustomCodeNotPermittedError
|
100
114
|
|
@@ -245,6 +245,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
245
245
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
246
246
|
HttpResponseFilter as HttpResponseFilterModel,
|
247
247
|
)
|
248
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
249
|
+
IncrementingCountCursor as IncrementingCountCursorModel,
|
250
|
+
)
|
248
251
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
249
252
|
InlineSchemaLoader as InlineSchemaLoaderModel,
|
250
253
|
)
|
@@ -496,6 +499,9 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_sta
|
|
496
499
|
CustomFormatConcurrentStreamStateConverter,
|
497
500
|
DateTimeStreamStateConverter,
|
498
501
|
)
|
502
|
+
from airbyte_cdk.sources.streams.concurrent.state_converters.incrementing_count_stream_state_converter import (
|
503
|
+
IncrementingCountStreamStateConverter,
|
504
|
+
)
|
499
505
|
from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction
|
500
506
|
from airbyte_cdk.sources.types import Config
|
501
507
|
from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
|
@@ -584,6 +590,7 @@ class ModelToComponentFactory:
|
|
584
590
|
FlattenFieldsModel: self.create_flatten_fields,
|
585
591
|
DpathFlattenFieldsModel: self.create_dpath_flatten_fields,
|
586
592
|
IterableDecoderModel: self.create_iterable_decoder,
|
593
|
+
IncrementingCountCursorModel: self.create_incrementing_count_cursor,
|
587
594
|
XmlDecoderModel: self.create_xml_decoder,
|
588
595
|
JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
|
589
596
|
DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
|
@@ -1189,6 +1196,70 @@ class ModelToComponentFactory:
|
|
1189
1196
|
clamping_strategy=clamping_strategy,
|
1190
1197
|
)
|
1191
1198
|
|
1199
|
+
def create_concurrent_cursor_from_incrementing_count_cursor(
|
1200
|
+
self,
|
1201
|
+
model_type: Type[BaseModel],
|
1202
|
+
component_definition: ComponentDefinition,
|
1203
|
+
stream_name: str,
|
1204
|
+
stream_namespace: Optional[str],
|
1205
|
+
config: Config,
|
1206
|
+
message_repository: Optional[MessageRepository] = None,
|
1207
|
+
**kwargs: Any,
|
1208
|
+
) -> ConcurrentCursor:
|
1209
|
+
# Per-partition incremental streams can dynamically create child cursors which will pass their current
|
1210
|
+
# state via the stream_state keyword argument. Incremental syncs without parent streams use the
|
1211
|
+
# incoming state and connector_state_manager that is initialized when the component factory is created
|
1212
|
+
stream_state = (
|
1213
|
+
self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
|
1214
|
+
if "stream_state" not in kwargs
|
1215
|
+
else kwargs["stream_state"]
|
1216
|
+
)
|
1217
|
+
|
1218
|
+
component_type = component_definition.get("type")
|
1219
|
+
if component_definition.get("type") != model_type.__name__:
|
1220
|
+
raise ValueError(
|
1221
|
+
f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
|
1222
|
+
)
|
1223
|
+
|
1224
|
+
incrementing_count_cursor_model = model_type.parse_obj(component_definition)
|
1225
|
+
|
1226
|
+
if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel):
|
1227
|
+
raise ValueError(
|
1228
|
+
f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}"
|
1229
|
+
)
|
1230
|
+
|
1231
|
+
interpolated_start_value = (
|
1232
|
+
InterpolatedString.create(
|
1233
|
+
incrementing_count_cursor_model.start_value, # type: ignore
|
1234
|
+
parameters=incrementing_count_cursor_model.parameters or {},
|
1235
|
+
)
|
1236
|
+
if incrementing_count_cursor_model.start_value
|
1237
|
+
else 0
|
1238
|
+
)
|
1239
|
+
|
1240
|
+
interpolated_cursor_field = InterpolatedString.create(
|
1241
|
+
incrementing_count_cursor_model.cursor_field,
|
1242
|
+
parameters=incrementing_count_cursor_model.parameters or {},
|
1243
|
+
)
|
1244
|
+
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
1245
|
+
|
1246
|
+
connector_state_converter = IncrementingCountStreamStateConverter(
|
1247
|
+
is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
|
1248
|
+
)
|
1249
|
+
|
1250
|
+
return ConcurrentCursor(
|
1251
|
+
stream_name=stream_name,
|
1252
|
+
stream_namespace=stream_namespace,
|
1253
|
+
stream_state=stream_state,
|
1254
|
+
message_repository=message_repository or self._message_repository,
|
1255
|
+
connector_state_manager=self._connector_state_manager,
|
1256
|
+
connector_state_converter=connector_state_converter,
|
1257
|
+
cursor_field=cursor_field,
|
1258
|
+
slice_boundary_fields=None,
|
1259
|
+
start=interpolated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
1260
|
+
end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
1261
|
+
)
|
1262
|
+
|
1192
1263
|
def _assemble_weekday(self, weekday: str) -> Weekday:
|
1193
1264
|
match weekday:
|
1194
1265
|
case "MONDAY":
|
@@ -1622,6 +1693,31 @@ class ModelToComponentFactory:
|
|
1622
1693
|
config=config,
|
1623
1694
|
parameters=model.parameters or {},
|
1624
1695
|
)
|
1696
|
+
elif model.incremental_sync and isinstance(
|
1697
|
+
model.incremental_sync, IncrementingCountCursorModel
|
1698
|
+
):
|
1699
|
+
cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore
|
1700
|
+
|
1701
|
+
start_time_option = (
|
1702
|
+
self._create_component_from_model(
|
1703
|
+
cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
|
1704
|
+
config,
|
1705
|
+
parameters=cursor_model.parameters or {},
|
1706
|
+
)
|
1707
|
+
if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
|
1708
|
+
else None
|
1709
|
+
)
|
1710
|
+
|
1711
|
+
# The concurrent engine defaults the start/end fields on the slice to "start" and "end", but
|
1712
|
+
# the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time
|
1713
|
+
partition_field_start = "start"
|
1714
|
+
|
1715
|
+
request_options_provider = DatetimeBasedRequestOptionsProvider(
|
1716
|
+
start_time_option=start_time_option,
|
1717
|
+
partition_field_start=partition_field_start,
|
1718
|
+
config=config,
|
1719
|
+
parameters=model.parameters or {},
|
1720
|
+
)
|
1625
1721
|
else:
|
1626
1722
|
request_options_provider = None
|
1627
1723
|
|
@@ -2111,6 +2207,22 @@ class ModelToComponentFactory:
|
|
2111
2207
|
stream_response=False if self._emit_connector_builder_messages else True,
|
2112
2208
|
)
|
2113
2209
|
|
2210
|
+
@staticmethod
|
2211
|
+
def create_incrementing_count_cursor(
|
2212
|
+
model: IncrementingCountCursorModel, config: Config, **kwargs: Any
|
2213
|
+
) -> DatetimeBasedCursor:
|
2214
|
+
# This should not actually get used anywhere at runtime, but needed to add this to pass checks since
|
2215
|
+
# we still parse models into components. The issue is that there's no runtime implementation of a
|
2216
|
+
# IncrementingCountCursor.
|
2217
|
+
# A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor.
|
2218
|
+
return DatetimeBasedCursor(
|
2219
|
+
cursor_field=model.cursor_field,
|
2220
|
+
datetime_format="%Y-%m-%d",
|
2221
|
+
start_datetime="2024-12-12",
|
2222
|
+
config=config,
|
2223
|
+
parameters={},
|
2224
|
+
)
|
2225
|
+
|
2114
2226
|
@staticmethod
|
2115
2227
|
def create_iterable_decoder(
|
2116
2228
|
model: IterableDecoderModel, config: Config, **kwargs: Any
|
@@ -2744,32 +2856,32 @@ class ModelToComponentFactory:
|
|
2744
2856
|
if model.delete_requester
|
2745
2857
|
else None
|
2746
2858
|
)
|
2747
|
-
|
2859
|
+
download_target_requester = (
|
2748
2860
|
self._create_component_from_model(
|
2749
|
-
model=model.
|
2861
|
+
model=model.download_target_requester,
|
2750
2862
|
decoder=decoder,
|
2751
2863
|
config=config,
|
2752
2864
|
name=f"job extract_url - {name}",
|
2753
2865
|
)
|
2754
|
-
if model.
|
2866
|
+
if model.download_target_requester
|
2755
2867
|
else None
|
2756
2868
|
)
|
2757
2869
|
status_extractor = self._create_component_from_model(
|
2758
2870
|
model=model.status_extractor, decoder=decoder, config=config, name=name
|
2759
2871
|
)
|
2760
|
-
|
2761
|
-
model=model.
|
2872
|
+
download_target_extractor = self._create_component_from_model(
|
2873
|
+
model=model.download_target_extractor, decoder=decoder, config=config, name=name
|
2762
2874
|
)
|
2763
2875
|
job_repository: AsyncJobRepository = AsyncHttpJobRepository(
|
2764
2876
|
creation_requester=creation_requester,
|
2765
2877
|
polling_requester=polling_requester,
|
2766
2878
|
download_retriever=download_retriever,
|
2767
|
-
|
2879
|
+
download_target_requester=download_target_requester,
|
2768
2880
|
abort_requester=abort_requester,
|
2769
2881
|
delete_requester=delete_requester,
|
2770
2882
|
status_extractor=status_extractor,
|
2771
2883
|
status_mapping=self._create_async_job_status_mapping(model.status_mapping, config),
|
2772
|
-
|
2884
|
+
download_target_extractor=download_target_extractor,
|
2773
2885
|
)
|
2774
2886
|
|
2775
2887
|
async_job_partition_router = AsyncJobPartitionRouter(
|