airbyte-cdk 6.45.10__py3-none-any.whl → 6.46.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/__init__.py +9 -1
- airbyte_cdk/cli/airbyte_cdk/__init__.py +86 -0
- airbyte_cdk/cli/airbyte_cdk/_connector.py +179 -0
- airbyte_cdk/cli/airbyte_cdk/_image.py +95 -0
- airbyte_cdk/cli/airbyte_cdk/_manifest.py +24 -0
- airbyte_cdk/cli/airbyte_cdk/_secrets.py +150 -0
- airbyte_cdk/cli/airbyte_cdk/_util.py +43 -0
- airbyte_cdk/cli/airbyte_cdk/_version.py +13 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +10 -0
- airbyte_cdk/models/connector_metadata.py +97 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +108 -79
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +122 -45
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +87 -82
- airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +9 -0
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +2 -2
- airbyte_cdk/sources/declarative/parsers/manifest_normalizer.py +462 -0
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +2 -2
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +24 -24
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +17 -1
- airbyte_cdk/test/standard_tests/connector_base.py +51 -25
- airbyte_cdk/test/standard_tests/declarative_sources.py +3 -1
- airbyte_cdk/test/standard_tests/test_resources.py +69 -0
- airbyte_cdk/test/standard_tests/util.py +79 -0
- airbyte_cdk/utils/docker.py +337 -0
- airbyte_cdk/utils/docker_image_templates.py +101 -0
- {airbyte_cdk-6.45.10.dist-info → airbyte_cdk-6.46.1.dist-info}/METADATA +6 -1
- {airbyte_cdk-6.45.10.dist-info → airbyte_cdk-6.46.1.dist-info}/RECORD +31 -18
- {airbyte_cdk-6.45.10.dist-info → airbyte_cdk-6.46.1.dist-info}/entry_points.txt +1 -0
- {airbyte_cdk-6.45.10.dist-info → airbyte_cdk-6.46.1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.45.10.dist-info → airbyte_cdk-6.46.1.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.45.10.dist-info → airbyte_cdk-6.46.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,97 @@
|
|
1
|
+
"""Models to represent the structure of a `metadata.yaml` file."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from enum import Enum
|
6
|
+
from pathlib import Path
|
7
|
+
|
8
|
+
import yaml
|
9
|
+
from pydantic import BaseModel, Field
|
10
|
+
|
11
|
+
|
12
|
+
class ConnectorLanguage(str, Enum):
|
13
|
+
"""Connector implementation language."""
|
14
|
+
|
15
|
+
PYTHON = "python"
|
16
|
+
JAVA = "java"
|
17
|
+
LOW_CODE = "low-code"
|
18
|
+
MANIFEST_ONLY = "manifest-only"
|
19
|
+
UNKNOWN = "unknown"
|
20
|
+
|
21
|
+
|
22
|
+
class ConnectorBuildOptions(BaseModel):
|
23
|
+
"""Connector build options from metadata.yaml."""
|
24
|
+
|
25
|
+
model_config = {"extra": "allow"}
|
26
|
+
|
27
|
+
baseImage: str | None = Field(
|
28
|
+
None,
|
29
|
+
description="Base image to use for building the connector",
|
30
|
+
)
|
31
|
+
path: str | None = Field(
|
32
|
+
None,
|
33
|
+
description="Path to the connector code within the repository",
|
34
|
+
)
|
35
|
+
|
36
|
+
|
37
|
+
class ConnectorMetadata(BaseModel):
|
38
|
+
"""Connector metadata from metadata.yaml."""
|
39
|
+
|
40
|
+
model_config = {"extra": "allow"}
|
41
|
+
|
42
|
+
dockerRepository: str = Field(..., description="Docker repository for the connector image")
|
43
|
+
dockerImageTag: str = Field(..., description="Docker image tag for the connector")
|
44
|
+
|
45
|
+
tags: list[str] = Field(
|
46
|
+
default=[],
|
47
|
+
description="List of tags for the connector",
|
48
|
+
)
|
49
|
+
|
50
|
+
@property
|
51
|
+
def language(self) -> ConnectorLanguage:
|
52
|
+
"""Get the connector language."""
|
53
|
+
for tag in self.tags:
|
54
|
+
if tag.startswith("language:"):
|
55
|
+
language = tag.split(":", 1)[1]
|
56
|
+
if language == "python":
|
57
|
+
return ConnectorLanguage.PYTHON
|
58
|
+
elif language == "java":
|
59
|
+
return ConnectorLanguage.JAVA
|
60
|
+
elif language == "low-code":
|
61
|
+
return ConnectorLanguage.LOW_CODE
|
62
|
+
elif language == "manifest-only":
|
63
|
+
return ConnectorLanguage.MANIFEST_ONLY
|
64
|
+
|
65
|
+
return ConnectorLanguage.UNKNOWN
|
66
|
+
|
67
|
+
connectorBuildOptions: ConnectorBuildOptions | None = Field(
|
68
|
+
None, description="Options for building the connector"
|
69
|
+
)
|
70
|
+
|
71
|
+
|
72
|
+
class MetadataFile(BaseModel):
|
73
|
+
"""Represents the structure of a metadata.yaml file."""
|
74
|
+
|
75
|
+
model_config = {"extra": "allow"}
|
76
|
+
|
77
|
+
data: ConnectorMetadata = Field(..., description="Connector metadata")
|
78
|
+
|
79
|
+
@classmethod
|
80
|
+
def from_file(
|
81
|
+
cls,
|
82
|
+
file_path: Path,
|
83
|
+
) -> MetadataFile:
|
84
|
+
"""Load metadata from a YAML file."""
|
85
|
+
if not file_path.exists():
|
86
|
+
raise FileNotFoundError(f"Metadata file not found: {file_path!s}")
|
87
|
+
|
88
|
+
metadata_content = file_path.read_text()
|
89
|
+
metadata_dict = yaml.safe_load(metadata_content)
|
90
|
+
|
91
|
+
if not metadata_dict or "data" not in metadata_dict:
|
92
|
+
raise ValueError(
|
93
|
+
"Invalid metadata format: missing 'data' field in YAML file '{file_path!s}'"
|
94
|
+
)
|
95
|
+
|
96
|
+
metadata_file = MetadataFile.model_validate(metadata_dict)
|
97
|
+
return metadata_file
|
@@ -440,7 +440,9 @@ definitions:
|
|
440
440
|
description: Backoff time in seconds.
|
441
441
|
anyOf:
|
442
442
|
- type: number
|
443
|
+
title: Number of seconds
|
443
444
|
- type: string
|
445
|
+
title: Interpolated Value
|
444
446
|
interpolation_context:
|
445
447
|
- config
|
446
448
|
examples:
|
@@ -1057,15 +1059,18 @@ definitions:
|
|
1057
1059
|
type: string
|
1058
1060
|
enum: [JwtAuthenticator]
|
1059
1061
|
secret_key:
|
1062
|
+
title: Secret Key
|
1060
1063
|
type: string
|
1061
1064
|
description: Secret used to sign the JSON web token.
|
1062
1065
|
examples:
|
1063
1066
|
- "{{ config['secret_key'] }}"
|
1064
1067
|
base64_encode_secret_key:
|
1068
|
+
title: Base64-encode Secret Key
|
1065
1069
|
type: boolean
|
1066
1070
|
description: When set to true, the secret key will be base64 encoded prior to being encoded as part of the JWT. Only set to "true" when required by the API.
|
1067
1071
|
default: False
|
1068
1072
|
algorithm:
|
1073
|
+
title: Algorithm
|
1069
1074
|
type: string
|
1070
1075
|
description: Algorithm used to sign the JSON web token.
|
1071
1076
|
enum:
|
@@ -1389,27 +1394,27 @@ definitions:
|
|
1389
1394
|
type:
|
1390
1395
|
type: string
|
1391
1396
|
enum: [DeclarativeStream]
|
1397
|
+
name:
|
1398
|
+
title: Name
|
1399
|
+
description: The stream name.
|
1400
|
+
type: string
|
1401
|
+
default: ""
|
1402
|
+
example:
|
1403
|
+
- "Users"
|
1392
1404
|
retriever:
|
1393
1405
|
title: Retriever
|
1394
1406
|
description: Component used to coordinate how records are extracted across stream slices and request pages.
|
1395
1407
|
anyOf:
|
1408
|
+
- "$ref": "#/definitions/SimpleRetriever"
|
1396
1409
|
- "$ref": "#/definitions/AsyncRetriever"
|
1397
1410
|
- "$ref": "#/definitions/CustomRetriever"
|
1398
|
-
- "$ref": "#/definitions/SimpleRetriever"
|
1399
1411
|
incremental_sync:
|
1400
1412
|
title: Incremental Sync
|
1401
1413
|
description: Component used to fetch data incrementally based on a time field in the data.
|
1402
1414
|
anyOf:
|
1403
|
-
- "$ref": "#/definitions/CustomIncrementalSync"
|
1404
1415
|
- "$ref": "#/definitions/DatetimeBasedCursor"
|
1405
1416
|
- "$ref": "#/definitions/IncrementingCountCursor"
|
1406
|
-
|
1407
|
-
title: Name
|
1408
|
-
description: The stream name.
|
1409
|
-
type: string
|
1410
|
-
default: ""
|
1411
|
-
example:
|
1412
|
-
- "Users"
|
1417
|
+
- "$ref": "#/definitions/CustomIncrementalSync"
|
1413
1418
|
primary_key:
|
1414
1419
|
title: Primary Key
|
1415
1420
|
description: The primary key of the stream.
|
@@ -1419,8 +1424,8 @@ definitions:
|
|
1419
1424
|
title: Schema Loader
|
1420
1425
|
description: Component used to retrieve the schema for the current stream.
|
1421
1426
|
anyOf:
|
1422
|
-
- "$ref": "#/definitions/DynamicSchemaLoader"
|
1423
1427
|
- "$ref": "#/definitions/InlineSchemaLoader"
|
1428
|
+
- "$ref": "#/definitions/DynamicSchemaLoader"
|
1424
1429
|
- "$ref": "#/definitions/JsonFileSchemaLoader"
|
1425
1430
|
- "$ref": "#/definitions/CustomSchemaLoader"
|
1426
1431
|
# TODO we have move the transformation to the RecordSelector level in the code but kept this here for
|
@@ -1484,6 +1489,9 @@ definitions:
|
|
1484
1489
|
examples:
|
1485
1490
|
- "{{ record.id }}/{{ record.file_name }}/"
|
1486
1491
|
- "{{ record.id }}_{{ record.file_name }}/"
|
1492
|
+
$parameters:
|
1493
|
+
type: object
|
1494
|
+
additionalProperties: true
|
1487
1495
|
$parameters:
|
1488
1496
|
type: object
|
1489
1497
|
additional_properties: true
|
@@ -1709,13 +1717,15 @@ definitions:
|
|
1709
1717
|
title: Pagination Strategy
|
1710
1718
|
description: Strategy defining how records are paginated.
|
1711
1719
|
anyOf:
|
1720
|
+
- "$ref": "#/definitions/PageIncrement"
|
1721
|
+
- "$ref": "#/definitions/OffsetIncrement"
|
1712
1722
|
- "$ref": "#/definitions/CursorPagination"
|
1713
1723
|
- "$ref": "#/definitions/CustomPaginationStrategy"
|
1714
|
-
- "$ref": "#/definitions/OffsetIncrement"
|
1715
|
-
- "$ref": "#/definitions/PageIncrement"
|
1716
1724
|
page_size_option:
|
1725
|
+
title: Inject Page Size Into Outgoing HTTP Request
|
1717
1726
|
"$ref": "#/definitions/RequestOption"
|
1718
1727
|
page_token_option:
|
1728
|
+
title: Inject Page Token Into Outgoing HTTP Request
|
1719
1729
|
anyOf:
|
1720
1730
|
- "$ref": "#/definitions/RequestOption"
|
1721
1731
|
- "$ref": "#/definitions/RequestPath"
|
@@ -1813,6 +1823,8 @@ definitions:
|
|
1813
1823
|
type: object
|
1814
1824
|
additionalProperties: true
|
1815
1825
|
SessionTokenAuthenticator:
|
1826
|
+
title: Session Token Authenticator
|
1827
|
+
description: Authenticator for requests using the session token as an API key that's injected into the request.
|
1816
1828
|
type: object
|
1817
1829
|
required:
|
1818
1830
|
- type
|
@@ -1905,8 +1917,9 @@ definitions:
|
|
1905
1917
|
type: string
|
1906
1918
|
enum: [HttpRequester]
|
1907
1919
|
url_base:
|
1920
|
+
linkable: true
|
1908
1921
|
title: API Base URL
|
1909
|
-
description: Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.
|
1922
|
+
description: The Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.
|
1910
1923
|
type: string
|
1911
1924
|
interpolation_context:
|
1912
1925
|
- config
|
@@ -1924,7 +1937,7 @@ definitions:
|
|
1924
1937
|
- "https://example.com/api/v1/resource/{{ next_page_token['id'] }}"
|
1925
1938
|
path:
|
1926
1939
|
title: URL Path
|
1927
|
-
description: Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.
|
1940
|
+
description: The Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.
|
1928
1941
|
type: string
|
1929
1942
|
interpolation_context:
|
1930
1943
|
- config
|
@@ -1939,27 +1952,6 @@ definitions:
|
|
1939
1952
|
- "/products"
|
1940
1953
|
- "/quotes/{{ stream_partition['id'] }}/quote_line_groups"
|
1941
1954
|
- "/trades/{{ config['symbol_id'] }}/history"
|
1942
|
-
authenticator:
|
1943
|
-
title: Authenticator
|
1944
|
-
description: Authentication method to use for requests sent to the API.
|
1945
|
-
anyOf:
|
1946
|
-
- "$ref": "#/definitions/ApiKeyAuthenticator"
|
1947
|
-
- "$ref": "#/definitions/BasicHttpAuthenticator"
|
1948
|
-
- "$ref": "#/definitions/BearerAuthenticator"
|
1949
|
-
- "$ref": "#/definitions/CustomAuthenticator"
|
1950
|
-
- "$ref": "#/definitions/OAuthAuthenticator"
|
1951
|
-
- "$ref": "#/definitions/JwtAuthenticator"
|
1952
|
-
- "$ref": "#/definitions/NoAuth"
|
1953
|
-
- "$ref": "#/definitions/SessionTokenAuthenticator"
|
1954
|
-
- "$ref": "#/definitions/LegacySessionTokenAuthenticator"
|
1955
|
-
- "$ref": "#/definitions/SelectiveAuthenticator"
|
1956
|
-
error_handler:
|
1957
|
-
title: Error Handler
|
1958
|
-
description: Error handler component that defines how to handle errors.
|
1959
|
-
anyOf:
|
1960
|
-
- "$ref": "#/definitions/DefaultErrorHandler"
|
1961
|
-
- "$ref": "#/definitions/CustomErrorHandler"
|
1962
|
-
- "$ref": "#/definitions/CompositeErrorHandler"
|
1963
1955
|
http_method:
|
1964
1956
|
title: HTTP Method
|
1965
1957
|
description: The HTTP method used to fetch data from the source (can be GET or POST).
|
@@ -1971,14 +1963,29 @@ definitions:
|
|
1971
1963
|
examples:
|
1972
1964
|
- GET
|
1973
1965
|
- POST
|
1966
|
+
authenticator:
|
1967
|
+
title: Authenticator
|
1968
|
+
linkable: true
|
1969
|
+
description: Authentication method to use for requests sent to the API.
|
1970
|
+
anyOf:
|
1971
|
+
- "$ref": "#/definitions/NoAuth"
|
1972
|
+
- "$ref": "#/definitions/ApiKeyAuthenticator"
|
1973
|
+
- "$ref": "#/definitions/BasicHttpAuthenticator"
|
1974
|
+
- "$ref": "#/definitions/BearerAuthenticator"
|
1975
|
+
- "$ref": "#/definitions/OAuthAuthenticator"
|
1976
|
+
- "$ref": "#/definitions/JwtAuthenticator"
|
1977
|
+
- "$ref": "#/definitions/SessionTokenAuthenticator"
|
1978
|
+
- "$ref": "#/definitions/SelectiveAuthenticator"
|
1979
|
+
- "$ref": "#/definitions/CustomAuthenticator"
|
1980
|
+
- "$ref": "#/definitions/LegacySessionTokenAuthenticator"
|
1974
1981
|
request_body_data:
|
1975
1982
|
title: Request Body Payload (Non-JSON)
|
1976
1983
|
description: Specifies how to populate the body of the request with a non-JSON payload. Plain text will be sent as is, whereas objects will be converted to a urlencoded form.
|
1977
1984
|
anyOf:
|
1978
|
-
- type: string
|
1979
1985
|
- type: object
|
1980
1986
|
additionalProperties:
|
1981
1987
|
type: string
|
1988
|
+
- type: string
|
1982
1989
|
interpolation_context:
|
1983
1990
|
- next_page_token
|
1984
1991
|
- stream_interval
|
@@ -1993,9 +2000,9 @@ definitions:
|
|
1993
2000
|
title: Request Body JSON Payload
|
1994
2001
|
description: Specifies how to populate the body of the request with a JSON payload. Can contain nested objects.
|
1995
2002
|
anyOf:
|
1996
|
-
- type: string
|
1997
2003
|
- type: object
|
1998
2004
|
additionalProperties: true
|
2005
|
+
- type: string
|
1999
2006
|
interpolation_context:
|
2000
2007
|
- next_page_token
|
2001
2008
|
- stream_interval
|
@@ -2012,10 +2019,12 @@ definitions:
|
|
2012
2019
|
title: Request Headers
|
2013
2020
|
description: Return any non-auth headers. Authentication headers will overwrite any overlapping headers returned from this method.
|
2014
2021
|
anyOf:
|
2015
|
-
- type: string
|
2016
2022
|
- type: object
|
2023
|
+
title: Key/Value Pairs
|
2017
2024
|
additionalProperties:
|
2018
2025
|
type: string
|
2026
|
+
- type: string
|
2027
|
+
title: Interpolated Value
|
2019
2028
|
interpolation_context:
|
2020
2029
|
- next_page_token
|
2021
2030
|
- stream_interval
|
@@ -2028,12 +2037,14 @@ definitions:
|
|
2028
2037
|
title: Query Parameters
|
2029
2038
|
description: Specifies the query parameters that should be set on an outgoing HTTP request given the inputs.
|
2030
2039
|
anyOf:
|
2031
|
-
- type: string
|
2032
2040
|
- type: object
|
2041
|
+
title: Key/Value Pairs
|
2033
2042
|
additionalProperties:
|
2034
2043
|
anyOf:
|
2035
2044
|
- type: string
|
2036
|
-
- $ref
|
2045
|
+
- $ref: "#/definitions/QueryProperties"
|
2046
|
+
- type: string
|
2047
|
+
title: Interpolated Value
|
2037
2048
|
interpolation_context:
|
2038
2049
|
- next_page_token
|
2039
2050
|
- stream_interval
|
@@ -2044,6 +2055,13 @@ definitions:
|
|
2044
2055
|
- query: 'last_event_time BETWEEN TIMESTAMP "{{ stream_interval.start_time }}" AND TIMESTAMP "{{ stream_interval.end_time }}"'
|
2045
2056
|
- searchIn: "{{ ','.join(config.get('search_in', [])) }}"
|
2046
2057
|
- sort_by[asc]: updated_at
|
2058
|
+
error_handler:
|
2059
|
+
title: Error Handler
|
2060
|
+
description: Error handler component that defines how to handle errors.
|
2061
|
+
anyOf:
|
2062
|
+
- "$ref": "#/definitions/DefaultErrorHandler"
|
2063
|
+
- "$ref": "#/definitions/CompositeErrorHandler"
|
2064
|
+
- "$ref": "#/definitions/CustomErrorHandler"
|
2047
2065
|
use_cache:
|
2048
2066
|
title: Use Cache
|
2049
2067
|
description: Enables stream requests caching. This field is automatically set by the CDK.
|
@@ -2258,6 +2276,7 @@ definitions:
|
|
2258
2276
|
title: Schema
|
2259
2277
|
description: Describes a streams' schema. Refer to the <a href="https://docs.airbyte.com/understanding-airbyte/supported-data-types/">Data Types documentation</a> for more details on which types are valid.
|
2260
2278
|
type: object
|
2279
|
+
additionalProperties: true
|
2261
2280
|
JsonFileSchemaLoader:
|
2262
2281
|
title: Json File Schema Loader
|
2263
2282
|
description: Loads the schema from a json file.
|
@@ -2939,7 +2958,9 @@ definitions:
|
|
2939
2958
|
description: The number of records to include in each pages.
|
2940
2959
|
anyOf:
|
2941
2960
|
- type: integer
|
2961
|
+
title: Number of Records
|
2942
2962
|
- type: string
|
2963
|
+
title: Interpolated Value
|
2943
2964
|
interpolation_context:
|
2944
2965
|
- config
|
2945
2966
|
- response
|
@@ -2971,7 +2992,9 @@ definitions:
|
|
2971
2992
|
- config
|
2972
2993
|
anyOf:
|
2973
2994
|
- type: integer
|
2995
|
+
title: Number of Records
|
2974
2996
|
- type: string
|
2997
|
+
title: Interpolated Value
|
2975
2998
|
examples:
|
2976
2999
|
- 100
|
2977
3000
|
- "100"
|
@@ -3065,10 +3088,13 @@ definitions:
|
|
3065
3088
|
description: The stream field to be used to distinguish unique records. Can either be a single field, an array of fields representing a composite key, or an array of arrays representing a composite key where the fields are nested fields.
|
3066
3089
|
anyOf:
|
3067
3090
|
- type: string
|
3091
|
+
title: Single Key
|
3068
3092
|
- type: array
|
3093
|
+
title: Composite Key
|
3069
3094
|
items:
|
3070
3095
|
type: string
|
3071
3096
|
- type: array
|
3097
|
+
title: Composite Key of Nested Fields
|
3072
3098
|
items:
|
3073
3099
|
type: array
|
3074
3100
|
items:
|
@@ -3178,6 +3204,7 @@ definitions:
|
|
3178
3204
|
type: string
|
3179
3205
|
enum: [RecordFilter]
|
3180
3206
|
condition:
|
3207
|
+
title: Condition
|
3181
3208
|
description: The predicate to filter a record. Records will be removed if evaluated to False.
|
3182
3209
|
type: string
|
3183
3210
|
default: ""
|
@@ -3207,25 +3234,24 @@ definitions:
|
|
3207
3234
|
enum: [RecordSelector]
|
3208
3235
|
extractor:
|
3209
3236
|
anyOf:
|
3210
|
-
- "$ref": "#/definitions/CustomRecordExtractor"
|
3211
3237
|
- "$ref": "#/definitions/DpathExtractor"
|
3238
|
+
- "$ref": "#/definitions/CustomRecordExtractor"
|
3212
3239
|
record_filter:
|
3213
3240
|
title: Record Filter
|
3214
3241
|
description: Responsible for filtering records to be emitted by the Source.
|
3215
3242
|
anyOf:
|
3216
|
-
- "$ref": "#/definitions/CustomRecordFilter"
|
3217
3243
|
- "$ref": "#/definitions/RecordFilter"
|
3244
|
+
- "$ref": "#/definitions/CustomRecordFilter"
|
3218
3245
|
schema_normalization:
|
3219
3246
|
title: Schema Normalization
|
3220
3247
|
description: Responsible for normalization according to the schema.
|
3221
3248
|
anyOf:
|
3222
3249
|
- "$ref": "#/definitions/SchemaNormalization"
|
3223
3250
|
- "$ref": "#/definitions/CustomSchemaNormalization"
|
3224
|
-
default: None
|
3225
3251
|
transform_before_filtering:
|
3252
|
+
title: Transform Before Filtering
|
3226
3253
|
description: If true, transformation will be applied before record filtering.
|
3227
3254
|
type: boolean
|
3228
|
-
default: false
|
3229
3255
|
$parameters:
|
3230
3256
|
type: object
|
3231
3257
|
additionalProperties: true
|
@@ -3234,11 +3260,11 @@ definitions:
|
|
3234
3260
|
description: Responsible for normalization according to the schema.
|
3235
3261
|
type: string
|
3236
3262
|
enum:
|
3237
|
-
- None
|
3238
3263
|
- Default
|
3239
|
-
examples:
|
3240
3264
|
- None
|
3265
|
+
examples:
|
3241
3266
|
- Default
|
3267
|
+
- None
|
3242
3268
|
RemoveFields:
|
3243
3269
|
title: Remove Fields
|
3244
3270
|
description: A transformation which removes fields from a record. The fields removed are designated using FieldPointers. During transformation, if a field or any of its parents does not exist in the record, no error is thrown.
|
@@ -3394,6 +3420,7 @@ definitions:
|
|
3394
3420
|
type: object
|
3395
3421
|
additionalProperties: true
|
3396
3422
|
StateDelegatingStream:
|
3423
|
+
title: State Delegating Stream
|
3397
3424
|
description: (This component is experimental. Use at your own risk.) Orchestrate the retriever's usage based on the state value.
|
3398
3425
|
type: object
|
3399
3426
|
required:
|
@@ -3413,17 +3440,18 @@ definitions:
|
|
3413
3440
|
example:
|
3414
3441
|
- "Users"
|
3415
3442
|
full_refresh_stream:
|
3416
|
-
title:
|
3443
|
+
title: Full Refresh Stream
|
3417
3444
|
description: Component used to coordinate how records are extracted across stream slices and request pages when the state is empty or not provided.
|
3418
3445
|
"$ref": "#/definitions/DeclarativeStream"
|
3419
3446
|
incremental_stream:
|
3420
|
-
title:
|
3447
|
+
title: Incremental Stream
|
3421
3448
|
description: Component used to coordinate how records are extracted across stream slices and request pages when the state provided.
|
3422
3449
|
"$ref": "#/definitions/DeclarativeStream"
|
3423
3450
|
$parameters:
|
3424
3451
|
type: object
|
3425
3452
|
additionalProperties: true
|
3426
3453
|
SimpleRetriever:
|
3454
|
+
title: Synchronous Retriever
|
3427
3455
|
description: Retrieves records by synchronously sending requests to fetch records. The retriever acts as an orchestrator between the requester, the record selector, the paginator, and the partition router.
|
3428
3456
|
type: object
|
3429
3457
|
required:
|
@@ -3434,14 +3462,26 @@ definitions:
|
|
3434
3462
|
type:
|
3435
3463
|
type: string
|
3436
3464
|
enum: [SimpleRetriever]
|
3437
|
-
record_selector:
|
3438
|
-
description: Component that describes how to extract records from a HTTP response.
|
3439
|
-
"$ref": "#/definitions/RecordSelector"
|
3440
3465
|
requester:
|
3441
3466
|
description: Requester component that describes how to prepare HTTP requests to send to the source API.
|
3442
3467
|
anyOf:
|
3443
|
-
- "$ref": "#/definitions/CustomRequester"
|
3444
3468
|
- "$ref": "#/definitions/HttpRequester"
|
3469
|
+
- "$ref": "#/definitions/CustomRequester"
|
3470
|
+
decoder:
|
3471
|
+
title: Decoder
|
3472
|
+
description: Component decoding the response so records can be extracted.
|
3473
|
+
anyOf:
|
3474
|
+
- "$ref": "#/definitions/CsvDecoder"
|
3475
|
+
- "$ref": "#/definitions/GzipDecoder"
|
3476
|
+
- "$ref": "#/definitions/JsonDecoder"
|
3477
|
+
- "$ref": "#/definitions/JsonlDecoder"
|
3478
|
+
- "$ref": "#/definitions/IterableDecoder"
|
3479
|
+
- "$ref": "#/definitions/XmlDecoder"
|
3480
|
+
- "$ref": "#/definitions/ZipfileDecoder"
|
3481
|
+
- "$ref": "#/definitions/CustomDecoder"
|
3482
|
+
record_selector:
|
3483
|
+
description: Component that describes how to extract records from a HTTP response.
|
3484
|
+
"$ref": "#/definitions/RecordSelector"
|
3445
3485
|
paginator:
|
3446
3486
|
description: Paginator component that describes how to navigate through the API's pages.
|
3447
3487
|
anyOf:
|
@@ -3456,29 +3496,17 @@ definitions:
|
|
3456
3496
|
description: PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.
|
3457
3497
|
default: []
|
3458
3498
|
anyOf:
|
3459
|
-
- "$ref": "#/definitions/CustomPartitionRouter"
|
3460
3499
|
- "$ref": "#/definitions/ListPartitionRouter"
|
3461
3500
|
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3462
3501
|
- "$ref": "#/definitions/GroupingPartitionRouter"
|
3502
|
+
- "$ref": "#/definitions/CustomPartitionRouter"
|
3463
3503
|
- type: array
|
3464
3504
|
items:
|
3465
3505
|
anyOf:
|
3466
|
-
- "$ref": "#/definitions/CustomPartitionRouter"
|
3467
3506
|
- "$ref": "#/definitions/ListPartitionRouter"
|
3468
3507
|
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3469
3508
|
- "$ref": "#/definitions/GroupingPartitionRouter"
|
3470
|
-
|
3471
|
-
title: Decoder
|
3472
|
-
description: Component decoding the response so records can be extracted.
|
3473
|
-
anyOf:
|
3474
|
-
- "$ref": "#/definitions/CustomDecoder"
|
3475
|
-
- "$ref": "#/definitions/CsvDecoder"
|
3476
|
-
- "$ref": "#/definitions/GzipDecoder"
|
3477
|
-
- "$ref": "#/definitions/JsonDecoder"
|
3478
|
-
- "$ref": "#/definitions/JsonlDecoder"
|
3479
|
-
- "$ref": "#/definitions/IterableDecoder"
|
3480
|
-
- "$ref": "#/definitions/XmlDecoder"
|
3481
|
-
- "$ref": "#/definitions/ZipfileDecoder"
|
3509
|
+
- "$ref": "#/definitions/CustomPartitionRouter"
|
3482
3510
|
$parameters:
|
3483
3511
|
type: object
|
3484
3512
|
additionalProperties: true
|
@@ -3544,6 +3572,7 @@ definitions:
|
|
3544
3572
|
items:
|
3545
3573
|
type: string
|
3546
3574
|
AsyncRetriever:
|
3575
|
+
title: Asynchronous Retriever
|
3547
3576
|
description: "Retrieves records by Asynchronously sending requests to fetch records. The retriever acts as an orchestrator between the requester, the record selector, the paginator, and the partition router."
|
3548
3577
|
type: object
|
3549
3578
|
required:
|
@@ -3569,29 +3598,29 @@ definitions:
|
|
3569
3598
|
status_extractor:
|
3570
3599
|
description: Responsible for fetching the actual status of the async job.
|
3571
3600
|
anyOf:
|
3572
|
-
- "$ref": "#/definitions/CustomRecordExtractor"
|
3573
3601
|
- "$ref": "#/definitions/DpathExtractor"
|
3602
|
+
- "$ref": "#/definitions/CustomRecordExtractor"
|
3574
3603
|
download_target_extractor:
|
3575
3604
|
description: Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.
|
3576
3605
|
anyOf:
|
3577
|
-
- "$ref": "#/definitions/CustomRecordExtractor"
|
3578
3606
|
- "$ref": "#/definitions/DpathExtractor"
|
3607
|
+
- "$ref": "#/definitions/CustomRecordExtractor"
|
3579
3608
|
download_extractor:
|
3580
3609
|
description: Responsible for fetching the records from provided urls.
|
3581
3610
|
anyOf:
|
3582
|
-
- "$ref": "#/definitions/CustomRecordExtractor"
|
3583
3611
|
- "$ref": "#/definitions/DpathExtractor"
|
3612
|
+
- "$ref": "#/definitions/CustomRecordExtractor"
|
3584
3613
|
- "$ref": "#/definitions/ResponseToFileExtractor"
|
3585
3614
|
creation_requester:
|
3586
3615
|
description: Requester component that describes how to prepare HTTP requests to send to the source API to create the async server-side job.
|
3587
3616
|
anyOf:
|
3588
|
-
- "$ref": "#/definitions/CustomRequester"
|
3589
3617
|
- "$ref": "#/definitions/HttpRequester"
|
3618
|
+
- "$ref": "#/definitions/CustomRequester"
|
3590
3619
|
polling_requester:
|
3591
3620
|
description: Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.
|
3592
3621
|
anyOf:
|
3593
|
-
- "$ref": "#/definitions/CustomRequester"
|
3594
3622
|
- "$ref": "#/definitions/HttpRequester"
|
3623
|
+
- "$ref": "#/definitions/CustomRequester"
|
3595
3624
|
polling_job_timeout:
|
3596
3625
|
description: The time in minutes after which the single Async Job should be considered as Timed Out.
|
3597
3626
|
anyOf:
|
@@ -3602,13 +3631,13 @@ definitions:
|
|
3602
3631
|
download_target_requester:
|
3603
3632
|
description: Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.
|
3604
3633
|
anyOf:
|
3605
|
-
- "$ref": "#/definitions/CustomRequester"
|
3606
3634
|
- "$ref": "#/definitions/HttpRequester"
|
3635
|
+
- "$ref": "#/definitions/CustomRequester"
|
3607
3636
|
download_requester:
|
3608
3637
|
description: Requester component that describes how to prepare HTTP requests to send to the source API to download the data provided by the completed async job.
|
3609
3638
|
anyOf:
|
3610
|
-
- "$ref": "#/definitions/CustomRequester"
|
3611
3639
|
- "$ref": "#/definitions/HttpRequester"
|
3640
|
+
- "$ref": "#/definitions/CustomRequester"
|
3612
3641
|
download_paginator:
|
3613
3642
|
description: Paginator component that describes how to navigate through the API's pages during download.
|
3614
3643
|
anyOf:
|
@@ -3617,34 +3646,33 @@ definitions:
|
|
3617
3646
|
abort_requester:
|
3618
3647
|
description: Requester component that describes how to prepare HTTP requests to send to the source API to abort a job once it is timed out from the source's perspective.
|
3619
3648
|
anyOf:
|
3620
|
-
- "$ref": "#/definitions/CustomRequester"
|
3621
3649
|
- "$ref": "#/definitions/HttpRequester"
|
3650
|
+
- "$ref": "#/definitions/CustomRequester"
|
3622
3651
|
delete_requester:
|
3623
3652
|
description: Requester component that describes how to prepare HTTP requests to send to the source API to delete a job once the records are extracted.
|
3624
3653
|
anyOf:
|
3625
|
-
- "$ref": "#/definitions/CustomRequester"
|
3626
3654
|
- "$ref": "#/definitions/HttpRequester"
|
3655
|
+
- "$ref": "#/definitions/CustomRequester"
|
3627
3656
|
partition_router:
|
3628
3657
|
title: Partition Router
|
3629
3658
|
description: PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.
|
3630
3659
|
default: []
|
3631
3660
|
anyOf:
|
3632
|
-
- "$ref": "#/definitions/CustomPartitionRouter"
|
3633
3661
|
- "$ref": "#/definitions/ListPartitionRouter"
|
3634
3662
|
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3635
3663
|
- "$ref": "#/definitions/GroupingPartitionRouter"
|
3664
|
+
- "$ref": "#/definitions/CustomPartitionRouter"
|
3636
3665
|
- type: array
|
3637
3666
|
items:
|
3638
3667
|
anyOf:
|
3639
|
-
- "$ref": "#/definitions/CustomPartitionRouter"
|
3640
3668
|
- "$ref": "#/definitions/ListPartitionRouter"
|
3641
3669
|
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3642
3670
|
- "$ref": "#/definitions/GroupingPartitionRouter"
|
3671
|
+
- "$ref": "#/definitions/CustomPartitionRouter"
|
3643
3672
|
decoder:
|
3644
3673
|
title: Decoder
|
3645
3674
|
description: Component decoding the response so records can be extracted.
|
3646
3675
|
anyOf:
|
3647
|
-
- "$ref": "#/definitions/CustomDecoder"
|
3648
3676
|
- "$ref": "#/definitions/CsvDecoder"
|
3649
3677
|
- "$ref": "#/definitions/GzipDecoder"
|
3650
3678
|
- "$ref": "#/definitions/JsonDecoder"
|
@@ -3652,11 +3680,11 @@ definitions:
|
|
3652
3680
|
- "$ref": "#/definitions/IterableDecoder"
|
3653
3681
|
- "$ref": "#/definitions/XmlDecoder"
|
3654
3682
|
- "$ref": "#/definitions/ZipfileDecoder"
|
3683
|
+
- "$ref": "#/definitions/CustomDecoder"
|
3655
3684
|
download_decoder:
|
3656
3685
|
title: Download Decoder
|
3657
3686
|
description: Component decoding the download response so records can be extracted.
|
3658
3687
|
anyOf:
|
3659
|
-
- "$ref": "#/definitions/CustomDecoder"
|
3660
3688
|
- "$ref": "#/definitions/CsvDecoder"
|
3661
3689
|
- "$ref": "#/definitions/GzipDecoder"
|
3662
3690
|
- "$ref": "#/definitions/JsonDecoder"
|
@@ -3664,6 +3692,7 @@ definitions:
|
|
3664
3692
|
- "$ref": "#/definitions/IterableDecoder"
|
3665
3693
|
- "$ref": "#/definitions/XmlDecoder"
|
3666
3694
|
- "$ref": "#/definitions/ZipfileDecoder"
|
3695
|
+
- "$ref": "#/definitions/CustomDecoder"
|
3667
3696
|
$parameters:
|
3668
3697
|
type: object
|
3669
3698
|
additionalProperties: true
|