airbyte-cdk 6.12.0rc1__py3-none-any.whl → 6.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +73 -85
- airbyte_cdk/sources/declarative/decoders/__init__.py +1 -2
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +65 -77
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +47 -30
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +9 -1
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +8 -31
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +20 -3
- airbyte_cdk/sources/declarative/transformations/flatten_fields.py +50 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
- airbyte_cdk/sources/streams/http/http_client.py +4 -2
- {airbyte_cdk-6.12.0rc1.dist-info → airbyte_cdk-6.12.1.dist-info}/METADATA +2 -1
- {airbyte_cdk-6.12.0rc1.dist-info → airbyte_cdk-6.12.1.dist-info}/RECORD +16 -16
- airbyte_cdk/sources/declarative/decoders/parsers/__init__.py +0 -7
- airbyte_cdk/sources/declarative/decoders/parsers/parsers.py +0 -49
- airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +0 -54
- {airbyte_cdk-6.12.0rc1.dist-info → airbyte_cdk-6.12.1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.12.0rc1.dist-info → airbyte_cdk-6.12.1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.12.0rc1.dist-info → airbyte_cdk-6.12.1.dist-info}/entry_points.txt +0 -0
@@ -1234,6 +1234,8 @@ definitions:
|
|
1234
1234
|
- "$ref": "#/definitions/CustomTransformation"
|
1235
1235
|
- "$ref": "#/definitions/RemoveFields"
|
1236
1236
|
- "$ref": "#/definitions/KeysToLower"
|
1237
|
+
- "$ref": "#/definitions/KeysToSnakeCase"
|
1238
|
+
- "$ref": "#/definitions/FlattenFields"
|
1237
1239
|
state_migrations:
|
1238
1240
|
title: State Migrations
|
1239
1241
|
description: Array of state migrations to be applied on the input state
|
@@ -1766,6 +1768,18 @@ definitions:
|
|
1766
1768
|
- "$ref": "#/definitions/AsyncRetriever"
|
1767
1769
|
- "$ref": "#/definitions/CustomRetriever"
|
1768
1770
|
- "$ref": "#/definitions/SimpleRetriever"
|
1771
|
+
schema_transformations:
|
1772
|
+
title: Schema Transformations
|
1773
|
+
description: A list of transformations to be applied to the schema.
|
1774
|
+
type: array
|
1775
|
+
items:
|
1776
|
+
anyOf:
|
1777
|
+
- "$ref": "#/definitions/AddFields"
|
1778
|
+
- "$ref": "#/definitions/CustomTransformation"
|
1779
|
+
- "$ref": "#/definitions/RemoveFields"
|
1780
|
+
- "$ref": "#/definitions/KeysToLower"
|
1781
|
+
- "$ref": "#/definitions/KeysToSnakeCase"
|
1782
|
+
- "$ref": "#/definitions/FlattenFields"
|
1769
1783
|
schema_type_identifier:
|
1770
1784
|
"$ref": "#/definitions/SchemaTypeIdentifier"
|
1771
1785
|
$parameters:
|
@@ -1838,6 +1852,32 @@ definitions:
|
|
1838
1852
|
$parameters:
|
1839
1853
|
type: object
|
1840
1854
|
additionalProperties: true
|
1855
|
+
KeysToSnakeCase:
|
1856
|
+
title: Key to Snake Case
|
1857
|
+
description: A transformation that renames all keys to snake case.
|
1858
|
+
type: object
|
1859
|
+
required:
|
1860
|
+
- type
|
1861
|
+
properties:
|
1862
|
+
type:
|
1863
|
+
type: string
|
1864
|
+
enum: [KeysToSnakeCase]
|
1865
|
+
$parameters:
|
1866
|
+
type: object
|
1867
|
+
additionalProperties: true
|
1868
|
+
FlattenFields:
|
1869
|
+
title: Flatten Fields
|
1870
|
+
description: A transformation that flatten record to single level format.
|
1871
|
+
type: object
|
1872
|
+
required:
|
1873
|
+
- type
|
1874
|
+
properties:
|
1875
|
+
type:
|
1876
|
+
type: string
|
1877
|
+
enum: [FlattenFields]
|
1878
|
+
$parameters:
|
1879
|
+
type: object
|
1880
|
+
additionalProperties: true
|
1841
1881
|
IterableDecoder:
|
1842
1882
|
title: Iterable Decoder
|
1843
1883
|
description: Use this if the response consists of strings separated by new lines (`\n`). The Decoder will wrap each row into a JSON object with the `record` key.
|
@@ -1897,56 +1937,6 @@ definitions:
|
|
1897
1937
|
$parameters:
|
1898
1938
|
type: object
|
1899
1939
|
additionalProperties: true
|
1900
|
-
ZipfileDecoder:
|
1901
|
-
title: Zipfile Decoder
|
1902
|
-
description: Decoder for response data that is returned as zipfile(s).
|
1903
|
-
type: object
|
1904
|
-
additionalProperties: true
|
1905
|
-
required:
|
1906
|
-
- type
|
1907
|
-
properties:
|
1908
|
-
type:
|
1909
|
-
type: string
|
1910
|
-
enum: [ZipfileDecoder]
|
1911
|
-
parser:
|
1912
|
-
title: Parser
|
1913
|
-
description: Parser to parse the decompressed data from the zipfile(s).
|
1914
|
-
anyOf:
|
1915
|
-
- "$ref": "#/definitions/JsonParser"
|
1916
|
-
- "$ref": "#/definitions/CustomParser"
|
1917
|
-
JsonParser:
|
1918
|
-
title: JsonParser
|
1919
|
-
description: Parser used for parsing str, bytes, or bytearray data and returning data in a dictionary format.
|
1920
|
-
type: object
|
1921
|
-
additionalProperties: true
|
1922
|
-
required:
|
1923
|
-
- type
|
1924
|
-
properties:
|
1925
|
-
type:
|
1926
|
-
type: string
|
1927
|
-
enum: [JsonParser]
|
1928
|
-
CustomParser:
|
1929
|
-
title: Custom Parser
|
1930
|
-
description: Use this to implement custom parser logic.
|
1931
|
-
type: object
|
1932
|
-
additionalProperties: true
|
1933
|
-
required:
|
1934
|
-
- type
|
1935
|
-
- class_name
|
1936
|
-
properties:
|
1937
|
-
type:
|
1938
|
-
type: string
|
1939
|
-
enum: [CustomParser]
|
1940
|
-
class_name:
|
1941
|
-
title: Class Name
|
1942
|
-
description: Fully-qualified name of the class that will be implementing the custom decoding. Has to be a sub class of Parser. The format is `source_<name>.<package>.<class_name>`.
|
1943
|
-
type: string
|
1944
|
-
additionalProperties: true
|
1945
|
-
examples:
|
1946
|
-
- "source_rivendell.components.ElvishParser"
|
1947
|
-
$parameters:
|
1948
|
-
type: object
|
1949
|
-
additionalProperties: true
|
1950
1940
|
ListPartitionRouter:
|
1951
1941
|
title: List Partition Router
|
1952
1942
|
description: A Partition router that specifies a list of attributes where each attribute describes a portion of the complete data set for a stream. During a sync, each value is iterated over and can be used as input to outbound API requests.
|
@@ -2180,65 +2170,63 @@ definitions:
|
|
2180
2170
|
- extract_output
|
2181
2171
|
properties:
|
2182
2172
|
consent_url:
|
2183
|
-
title:
|
2173
|
+
title: Consent URL
|
2184
2174
|
type: string
|
2185
2175
|
description: |-
|
2186
2176
|
The DeclarativeOAuth Specific string URL string template to initiate the authentication.
|
2187
2177
|
The placeholders are replaced during the processing to provide neccessary values.
|
2188
2178
|
examples:
|
2189
|
-
-
|
2190
|
-
-
|
2179
|
+
- https://domain.host.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}
|
2180
|
+
- https://endpoint.host.com/oauth2/authorize?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{scope_key}={urlEncoder:{{scope_key}}}&{state_key}={{state_key}}&subdomain={subdomain}
|
2191
2181
|
scope:
|
2192
|
-
title:
|
2182
|
+
title: Scopes
|
2193
2183
|
type: string
|
2194
2184
|
description: |-
|
2195
2185
|
The DeclarativeOAuth Specific string of the scopes needed to be grant for authenticated user.
|
2196
2186
|
examples:
|
2197
|
-
-
|
2187
|
+
- user:read user:read_orders workspaces:read
|
2198
2188
|
access_token_url:
|
2199
|
-
title:
|
2189
|
+
title: Access Token URL
|
2200
2190
|
type: string
|
2201
2191
|
description: |-
|
2202
2192
|
The DeclarativeOAuth Specific URL templated string to obtain the `access_token`, `refresh_token` etc.
|
2203
2193
|
The placeholders are replaced during the processing to provide neccessary values.
|
2204
2194
|
examples:
|
2205
|
-
-
|
2195
|
+
- https://auth.host.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}
|
2206
2196
|
access_token_headers:
|
2207
|
-
title:
|
2197
|
+
title: Access Token Headers
|
2208
2198
|
type: object
|
2209
2199
|
additionalProperties: true
|
2210
2200
|
description: |-
|
2211
2201
|
The DeclarativeOAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.
|
2212
2202
|
examples:
|
2213
|
-
-
|
2214
|
-
{
|
2215
|
-
|
2216
|
-
}
|
2203
|
+
- {
|
2204
|
+
"Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}",
|
2205
|
+
}
|
2217
2206
|
access_token_params:
|
2218
|
-
title:
|
2207
|
+
title: Access Token Query Params (Json Encoded)
|
2219
2208
|
type: object
|
2220
2209
|
additionalProperties: true
|
2221
2210
|
description: |-
|
2222
2211
|
The DeclarativeOAuth Specific optional query parameters to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.
|
2223
2212
|
When this property is provided, the query params will be encoded as `Json` and included in the outgoing API request.
|
2224
2213
|
examples:
|
2225
|
-
-
|
2226
|
-
{
|
2227
|
-
|
2228
|
-
|
2229
|
-
|
2230
|
-
}
|
2214
|
+
- {
|
2215
|
+
"{auth_code_key}": "{{auth_code_key}}",
|
2216
|
+
"{client_id_key}": "{{client_id_key}}",
|
2217
|
+
"{client_secret_key}": "{{client_secret_key}}",
|
2218
|
+
}
|
2231
2219
|
extract_output:
|
2232
|
-
title:
|
2220
|
+
title: Extract Output
|
2233
2221
|
type: array
|
2234
2222
|
items:
|
2235
2223
|
type: string
|
2236
2224
|
description: |-
|
2237
2225
|
The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.
|
2238
2226
|
examples:
|
2239
|
-
-
|
2227
|
+
- ["access_token", "refresh_token", "other_field"]
|
2240
2228
|
state:
|
2241
|
-
title:
|
2229
|
+
title: Configurable State Query Param
|
2242
2230
|
type: object
|
2243
2231
|
additionalProperties: true
|
2244
2232
|
required:
|
@@ -2253,49 +2241,49 @@ definitions:
|
|
2253
2241
|
max:
|
2254
2242
|
type: integer
|
2255
2243
|
examples:
|
2256
|
-
-
|
2244
|
+
- { "min": 7, "max": 128 }
|
2257
2245
|
client_id_key:
|
2258
|
-
title:
|
2246
|
+
title: Client ID Key Override
|
2259
2247
|
type: string
|
2260
2248
|
description: |-
|
2261
2249
|
The DeclarativeOAuth Specific optional override to provide the custom `client_id` key name, if required by data-provider.
|
2262
2250
|
examples:
|
2263
|
-
-
|
2251
|
+
- "my_custom_client_id_key_name"
|
2264
2252
|
client_secret_key:
|
2265
|
-
title:
|
2253
|
+
title: Client Secret Key Override
|
2266
2254
|
type: string
|
2267
2255
|
description: |-
|
2268
2256
|
The DeclarativeOAuth Specific optional override to provide the custom `client_secret` key name, if required by data-provider.
|
2269
2257
|
examples:
|
2270
|
-
-
|
2258
|
+
- "my_custom_client_secret_key_name"
|
2271
2259
|
scope_key:
|
2272
|
-
title:
|
2260
|
+
title: Scopes Key Override
|
2273
2261
|
type: string
|
2274
2262
|
description: |-
|
2275
2263
|
The DeclarativeOAuth Specific optional override to provide the custom `scope` key name, if required by data-provider.
|
2276
2264
|
examples:
|
2277
|
-
-
|
2265
|
+
- "my_custom_scope_key_key_name"
|
2278
2266
|
state_key:
|
2279
|
-
title:
|
2267
|
+
title: State Key Override
|
2280
2268
|
type: string
|
2281
2269
|
description: |-
|
2282
2270
|
The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider.
|
2283
2271
|
examples:
|
2284
|
-
-
|
2272
|
+
- "my_custom_state_key_key_name"
|
2285
2273
|
auth_code_key:
|
2286
|
-
title:
|
2274
|
+
title: Auth Code Key Override
|
2287
2275
|
type: string
|
2288
2276
|
description: |-
|
2289
2277
|
The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider.
|
2290
2278
|
examples:
|
2291
|
-
-
|
2279
|
+
- "my_custom_auth_code_key_name"
|
2292
2280
|
redirect_uri_key:
|
2293
|
-
title:
|
2281
|
+
title: Redirect URI Key Override
|
2294
2282
|
type: string
|
2295
2283
|
description: |-
|
2296
2284
|
The DeclarativeOAuth Specific optional override to provide the custom `redirect_uri` key name to something like `callback_uri`, if required by data-provider.
|
2297
2285
|
examples:
|
2298
|
-
-
|
2286
|
+
- "my_custom_redirect_uri_key_name"
|
2299
2287
|
complete_oauth_output_specification:
|
2300
2288
|
title: "OAuth output specification"
|
2301
2289
|
description: |-
|
@@ -7,6 +7,5 @@ from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder, J
|
|
7
7
|
from airbyte_cdk.sources.declarative.decoders.noop_decoder import NoopDecoder
|
8
8
|
from airbyte_cdk.sources.declarative.decoders.pagination_decoder_decorator import PaginationDecoderDecorator
|
9
9
|
from airbyte_cdk.sources.declarative.decoders.xml_decoder import XmlDecoder
|
10
|
-
from airbyte_cdk.sources.declarative.decoders.zipfile_decoder import ZipfileDecoder
|
11
10
|
|
12
|
-
__all__ = ["Decoder", "JsonDecoder", "JsonlDecoder", "IterableDecoder", "GzipJsonDecoder", "NoopDecoder", "PaginationDecoderDecorator", "XmlDecoder"
|
11
|
+
__all__ = ["Decoder", "JsonDecoder", "JsonlDecoder", "IterableDecoder", "GzipJsonDecoder", "NoopDecoder", "PaginationDecoderDecorator", "XmlDecoder"]
|
@@ -710,6 +710,16 @@ class KeysToLower(BaseModel):
|
|
710
710
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
711
711
|
|
712
712
|
|
713
|
+
class KeysToSnakeCase(BaseModel):
|
714
|
+
type: Literal["KeysToSnakeCase"]
|
715
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
716
|
+
|
717
|
+
|
718
|
+
class FlattenFields(BaseModel):
|
719
|
+
type: Literal["FlattenFields"]
|
720
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
721
|
+
|
722
|
+
|
713
723
|
class IterableDecoder(BaseModel):
|
714
724
|
type: Literal["IterableDecoder"]
|
715
725
|
|
@@ -741,27 +751,6 @@ class GzipJsonDecoder(BaseModel):
|
|
741
751
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
742
752
|
|
743
753
|
|
744
|
-
class JsonParser(BaseModel):
|
745
|
-
class Config:
|
746
|
-
extra = Extra.allow
|
747
|
-
|
748
|
-
type: Literal["JsonParser"]
|
749
|
-
|
750
|
-
|
751
|
-
class CustomParser(BaseModel):
|
752
|
-
class Config:
|
753
|
-
extra = Extra.allow
|
754
|
-
|
755
|
-
type: Literal["CustomParser"]
|
756
|
-
class_name: str = Field(
|
757
|
-
...,
|
758
|
-
description="Fully-qualified name of the class that will be implementing the custom decoding. Has to be a sub class of Parser. The format is `source_<name>.<package>.<class_name>`.",
|
759
|
-
examples=["source_rivendell.components.ElvishParser"],
|
760
|
-
title="Class Name",
|
761
|
-
)
|
762
|
-
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
763
|
-
|
764
|
-
|
765
754
|
class MinMaxDatetime(BaseModel):
|
766
755
|
type: Literal["MinMaxDatetime"]
|
767
756
|
datetime: str = Field(
|
@@ -816,104 +805,90 @@ class OauthConnectorInputSpecification(BaseModel):
|
|
816
805
|
...,
|
817
806
|
description="The DeclarativeOAuth Specific string URL string template to initiate the authentication.\nThe placeholders are replaced during the processing to provide neccessary values.",
|
818
807
|
examples=[
|
819
|
-
{
|
820
|
-
|
821
|
-
},
|
822
|
-
{
|
823
|
-
"consent_url": "https://endpoint.host.com/oauth2/authorize?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{scope_key}={urlEncoder:{{scope_key}}}&{state_key}={{state_key}}&subdomain={subdomain}"
|
824
|
-
},
|
808
|
+
"https://domain.host.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}",
|
809
|
+
"https://endpoint.host.com/oauth2/authorize?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{scope_key}={urlEncoder:{{scope_key}}}&{state_key}={{state_key}}&subdomain={subdomain}",
|
825
810
|
],
|
826
|
-
title="
|
811
|
+
title="Consent URL",
|
827
812
|
)
|
828
813
|
scope: Optional[str] = Field(
|
829
814
|
None,
|
830
815
|
description="The DeclarativeOAuth Specific string of the scopes needed to be grant for authenticated user.",
|
831
|
-
examples=[
|
832
|
-
title="
|
816
|
+
examples=["user:read user:read_orders workspaces:read"],
|
817
|
+
title="Scopes",
|
833
818
|
)
|
834
819
|
access_token_url: str = Field(
|
835
820
|
...,
|
836
821
|
description="The DeclarativeOAuth Specific URL templated string to obtain the `access_token`, `refresh_token` etc.\nThe placeholders are replaced during the processing to provide neccessary values.",
|
837
822
|
examples=[
|
838
|
-
{
|
839
|
-
"access_token_url": "https://auth.host.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}"
|
840
|
-
}
|
823
|
+
"https://auth.host.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}"
|
841
824
|
],
|
842
|
-
title="
|
825
|
+
title="Access Token URL",
|
843
826
|
)
|
844
827
|
access_token_headers: Optional[Dict[str, Any]] = Field(
|
845
828
|
None,
|
846
829
|
description="The DeclarativeOAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.",
|
847
|
-
examples=[
|
848
|
-
|
849
|
-
"access_token_headers": {
|
850
|
-
"Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}"
|
851
|
-
}
|
852
|
-
}
|
853
|
-
],
|
854
|
-
title="(Optional) DeclarativeOAuth Access Token Headers",
|
830
|
+
examples=[{"Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}"}],
|
831
|
+
title="Access Token Headers",
|
855
832
|
)
|
856
833
|
access_token_params: Optional[Dict[str, Any]] = Field(
|
857
834
|
None,
|
858
835
|
description="The DeclarativeOAuth Specific optional query parameters to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.\nWhen this property is provided, the query params will be encoded as `Json` and included in the outgoing API request.",
|
859
836
|
examples=[
|
860
837
|
{
|
861
|
-
"
|
862
|
-
|
863
|
-
|
864
|
-
"{client_secret_key}": "{{client_secret_key}}",
|
865
|
-
}
|
838
|
+
"{auth_code_key}": "{{auth_code_key}}",
|
839
|
+
"{client_id_key}": "{{client_id_key}}",
|
840
|
+
"{client_secret_key}": "{{client_secret_key}}",
|
866
841
|
}
|
867
842
|
],
|
868
|
-
title="
|
843
|
+
title="Access Token Query Params (Json Encoded)",
|
869
844
|
)
|
870
845
|
extract_output: List[str] = Field(
|
871
846
|
...,
|
872
847
|
description="The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.",
|
873
|
-
examples=[
|
874
|
-
title="
|
848
|
+
examples=[["access_token", "refresh_token", "other_field"]],
|
849
|
+
title="Extract Output",
|
875
850
|
)
|
876
851
|
state: Optional[State] = Field(
|
877
852
|
None,
|
878
853
|
description="The DeclarativeOAuth Specific object to provide the criteria of how the `state` query param should be constructed,\nincluding length and complexity.",
|
879
|
-
examples=[{"
|
880
|
-
title="
|
854
|
+
examples=[{"min": 7, "max": 128}],
|
855
|
+
title="Configurable State Query Param",
|
881
856
|
)
|
882
857
|
client_id_key: Optional[str] = Field(
|
883
858
|
None,
|
884
859
|
description="The DeclarativeOAuth Specific optional override to provide the custom `client_id` key name, if required by data-provider.",
|
885
|
-
examples=[
|
886
|
-
title="
|
860
|
+
examples=["my_custom_client_id_key_name"],
|
861
|
+
title="Client ID Key Override",
|
887
862
|
)
|
888
863
|
client_secret_key: Optional[str] = Field(
|
889
864
|
None,
|
890
865
|
description="The DeclarativeOAuth Specific optional override to provide the custom `client_secret` key name, if required by data-provider.",
|
891
|
-
examples=[
|
892
|
-
title="
|
866
|
+
examples=["my_custom_client_secret_key_name"],
|
867
|
+
title="Client Secret Key Override",
|
893
868
|
)
|
894
869
|
scope_key: Optional[str] = Field(
|
895
870
|
None,
|
896
871
|
description="The DeclarativeOAuth Specific optional override to provide the custom `scope` key name, if required by data-provider.",
|
897
|
-
examples=[
|
898
|
-
title="
|
872
|
+
examples=["my_custom_scope_key_key_name"],
|
873
|
+
title="Scopes Key Override",
|
899
874
|
)
|
900
875
|
state_key: Optional[str] = Field(
|
901
876
|
None,
|
902
877
|
description="The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider.",
|
903
|
-
examples=[
|
904
|
-
title="
|
878
|
+
examples=["my_custom_state_key_key_name"],
|
879
|
+
title="State Key Override",
|
905
880
|
)
|
906
881
|
auth_code_key: Optional[str] = Field(
|
907
882
|
None,
|
908
883
|
description="The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider.",
|
909
|
-
examples=[
|
910
|
-
title="
|
884
|
+
examples=["my_custom_auth_code_key_name"],
|
885
|
+
title="Auth Code Key Override",
|
911
886
|
)
|
912
887
|
redirect_uri_key: Optional[str] = Field(
|
913
888
|
None,
|
914
889
|
description="The DeclarativeOAuth Specific optional override to provide the custom `redirect_uri` key name to something like `callback_uri`, if required by data-provider.",
|
915
|
-
examples=[
|
916
|
-
title="
|
890
|
+
examples=["my_custom_redirect_uri_key_name"],
|
891
|
+
title="Redirect URI Key Override",
|
917
892
|
)
|
918
893
|
|
919
894
|
|
@@ -1489,18 +1464,6 @@ class SessionTokenRequestApiKeyAuthenticator(BaseModel):
|
|
1489
1464
|
)
|
1490
1465
|
|
1491
1466
|
|
1492
|
-
class ZipfileDecoder(BaseModel):
|
1493
|
-
class Config:
|
1494
|
-
extra = Extra.allow
|
1495
|
-
|
1496
|
-
type: Literal["ZipfileDecoder"]
|
1497
|
-
parser: Optional[Union[JsonParser, CustomParser]] = Field(
|
1498
|
-
None,
|
1499
|
-
description="Parser to parse the decompressed data from the zipfile(s).",
|
1500
|
-
title="Parser",
|
1501
|
-
)
|
1502
|
-
|
1503
|
-
|
1504
1467
|
class ListPartitionRouter(BaseModel):
|
1505
1468
|
type: Literal["ListPartitionRouter"]
|
1506
1469
|
cursor_field: str = Field(
|
@@ -1701,7 +1664,16 @@ class DeclarativeStream(BaseModel):
|
|
1701
1664
|
title="Schema Loader",
|
1702
1665
|
)
|
1703
1666
|
transformations: Optional[
|
1704
|
-
List[
|
1667
|
+
List[
|
1668
|
+
Union[
|
1669
|
+
AddFields,
|
1670
|
+
CustomTransformation,
|
1671
|
+
RemoveFields,
|
1672
|
+
KeysToLower,
|
1673
|
+
KeysToSnakeCase,
|
1674
|
+
FlattenFields,
|
1675
|
+
]
|
1676
|
+
]
|
1705
1677
|
] = Field(
|
1706
1678
|
None,
|
1707
1679
|
description="A list of transformations to be applied to each output record.",
|
@@ -1865,6 +1837,22 @@ class DynamicSchemaLoader(BaseModel):
|
|
1865
1837
|
description="Component used to coordinate how records are extracted across stream slices and request pages.",
|
1866
1838
|
title="Retriever",
|
1867
1839
|
)
|
1840
|
+
schema_transformations: Optional[
|
1841
|
+
List[
|
1842
|
+
Union[
|
1843
|
+
AddFields,
|
1844
|
+
CustomTransformation,
|
1845
|
+
RemoveFields,
|
1846
|
+
KeysToLower,
|
1847
|
+
KeysToSnakeCase,
|
1848
|
+
FlattenFields,
|
1849
|
+
]
|
1850
|
+
]
|
1851
|
+
] = Field(
|
1852
|
+
None,
|
1853
|
+
description="A list of transformations to be applied to the schema.",
|
1854
|
+
title="Schema Transformations",
|
1855
|
+
)
|
1868
1856
|
schema_type_identifier: SchemaTypeIdentifier
|
1869
1857
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1870
1858
|
|
@@ -66,9 +66,7 @@ from airbyte_cdk.sources.declarative.decoders import (
|
|
66
66
|
JsonlDecoder,
|
67
67
|
PaginationDecoderDecorator,
|
68
68
|
XmlDecoder,
|
69
|
-
ZipfileDecoder,
|
70
69
|
)
|
71
|
-
from airbyte_cdk.sources.declarative.decoders.parsers import JsonParser
|
72
70
|
from airbyte_cdk.sources.declarative.extractors import (
|
73
71
|
DpathExtractor,
|
74
72
|
RecordFilter,
|
@@ -199,6 +197,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
199
197
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
200
198
|
ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
|
201
199
|
)
|
200
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
201
|
+
FlattenFields as FlattenFieldsModel,
|
202
|
+
)
|
202
203
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
203
204
|
GzipJsonDecoder as GzipJsonDecoderModel,
|
204
205
|
)
|
@@ -226,9 +227,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
226
227
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
227
228
|
JsonlDecoder as JsonlDecoderModel,
|
228
229
|
)
|
229
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
230
|
-
JsonParser as JsonParserModel,
|
231
|
-
)
|
232
230
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
233
231
|
JwtAuthenticator as JwtAuthenticatorModel,
|
234
232
|
)
|
@@ -241,6 +239,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
241
239
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
242
240
|
KeysToLower as KeysToLowerModel,
|
243
241
|
)
|
242
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
243
|
+
KeysToSnakeCase as KeysToSnakeCaseModel,
|
244
|
+
)
|
244
245
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
245
246
|
LegacySessionTokenAuthenticator as LegacySessionTokenAuthenticatorModel,
|
246
247
|
)
|
@@ -321,9 +322,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
321
322
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
322
323
|
XmlDecoder as XmlDecoderModel,
|
323
324
|
)
|
324
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
325
|
-
ZipfileDecoder as ZipfileDecoderModel,
|
326
|
-
)
|
327
325
|
from airbyte_cdk.sources.declarative.partition_routers import (
|
328
326
|
CartesianProductStreamSlicer,
|
329
327
|
ListPartitionRouter,
|
@@ -331,6 +329,9 @@ from airbyte_cdk.sources.declarative.partition_routers import (
|
|
331
329
|
SinglePartitionRouter,
|
332
330
|
SubstreamPartitionRouter,
|
333
331
|
)
|
332
|
+
from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
|
333
|
+
AsyncJobPartitionRouter,
|
334
|
+
)
|
334
335
|
from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import (
|
335
336
|
ParentStreamConfig,
|
336
337
|
)
|
@@ -395,9 +396,15 @@ from airbyte_cdk.sources.declarative.transformations import (
|
|
395
396
|
RemoveFields,
|
396
397
|
)
|
397
398
|
from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition
|
399
|
+
from airbyte_cdk.sources.declarative.transformations.flatten_fields import (
|
400
|
+
FlattenFields,
|
401
|
+
)
|
398
402
|
from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import (
|
399
403
|
KeysToLowerTransformation,
|
400
404
|
)
|
405
|
+
from airbyte_cdk.sources.declarative.transformations.keys_to_snake_transformation import (
|
406
|
+
KeysToSnakeCaseTransformation,
|
407
|
+
)
|
401
408
|
from airbyte_cdk.sources.message import (
|
402
409
|
InMemoryMessageRepository,
|
403
410
|
LogAppenderMessageRepositoryDecorator,
|
@@ -478,9 +485,10 @@ class ModelToComponentFactory:
|
|
478
485
|
InlineSchemaLoaderModel: self.create_inline_schema_loader,
|
479
486
|
JsonDecoderModel: self.create_json_decoder,
|
480
487
|
JsonlDecoderModel: self.create_jsonl_decoder,
|
481
|
-
JsonParserModel: self.create_json_parser,
|
482
488
|
GzipJsonDecoderModel: self.create_gzipjson_decoder,
|
483
489
|
KeysToLowerModel: self.create_keys_to_lower_transformation,
|
490
|
+
KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
|
491
|
+
FlattenFieldsModel: self.create_flatten_fields,
|
484
492
|
IterableDecoderModel: self.create_iterable_decoder,
|
485
493
|
XmlDecoderModel: self.create_xml_decoder,
|
486
494
|
JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
|
@@ -514,7 +522,6 @@ class ModelToComponentFactory:
|
|
514
522
|
ConfigComponentsResolverModel: self.create_config_components_resolver,
|
515
523
|
StreamConfigModel: self.create_stream_config,
|
516
524
|
ComponentMappingDefinitionModel: self.create_components_mapping_definition,
|
517
|
-
ZipfileDecoderModel: self.create_zipfile_decoder,
|
518
525
|
}
|
519
526
|
|
520
527
|
# Needed for the case where we need to perform a second parse on the fields of a custom component
|
@@ -597,6 +604,16 @@ class ModelToComponentFactory:
|
|
597
604
|
) -> KeysToLowerTransformation:
|
598
605
|
return KeysToLowerTransformation()
|
599
606
|
|
607
|
+
def create_keys_to_snake_transformation(
|
608
|
+
self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any
|
609
|
+
) -> KeysToSnakeCaseTransformation:
|
610
|
+
return KeysToSnakeCaseTransformation()
|
611
|
+
|
612
|
+
def create_flatten_fields(
|
613
|
+
self, model: FlattenFieldsModel, config: Config, **kwargs: Any
|
614
|
+
) -> FlattenFields:
|
615
|
+
return FlattenFields()
|
616
|
+
|
600
617
|
@staticmethod
|
601
618
|
def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]:
|
602
619
|
if not value_type:
|
@@ -1648,6 +1665,13 @@ class ModelToComponentFactory:
|
|
1648
1665
|
model.retriever, stream_slicer
|
1649
1666
|
)
|
1650
1667
|
|
1668
|
+
schema_transformations = []
|
1669
|
+
if model.schema_transformations:
|
1670
|
+
for transformation_model in model.schema_transformations:
|
1671
|
+
schema_transformations.append(
|
1672
|
+
self._create_component_from_model(model=transformation_model, config=config)
|
1673
|
+
)
|
1674
|
+
|
1651
1675
|
retriever = self._create_component_from_model(
|
1652
1676
|
model=model.retriever,
|
1653
1677
|
config=config,
|
@@ -1662,6 +1686,7 @@ class ModelToComponentFactory:
|
|
1662
1686
|
return DynamicSchemaLoader(
|
1663
1687
|
retriever=retriever,
|
1664
1688
|
config=config,
|
1689
|
+
schema_transformations=schema_transformations,
|
1665
1690
|
schema_type_identifier=schema_type_identifier,
|
1666
1691
|
parameters=model.parameters or {},
|
1667
1692
|
)
|
@@ -1692,20 +1717,6 @@ class ModelToComponentFactory:
|
|
1692
1717
|
) -> GzipJsonDecoder:
|
1693
1718
|
return GzipJsonDecoder(parameters={}, encoding=model.encoding)
|
1694
1719
|
|
1695
|
-
def create_zipfile_decoder(
|
1696
|
-
self, model: ZipfileDecoderModel, config: Config, **kwargs: Any
|
1697
|
-
) -> ZipfileDecoder:
|
1698
|
-
parser = (
|
1699
|
-
self._create_component_from_model(model=model.parser, config=config)
|
1700
|
-
if model.parser
|
1701
|
-
else None
|
1702
|
-
)
|
1703
|
-
return ZipfileDecoder(parameters={}, parser=parser)
|
1704
|
-
|
1705
|
-
@staticmethod
|
1706
|
-
def create_json_parser(model: JsonParserModel, config: Config, **kwargs: Any) -> JsonParser:
|
1707
|
-
return JsonParser(parameters={})
|
1708
|
-
|
1709
1720
|
@staticmethod
|
1710
1721
|
def create_json_file_schema_loader(
|
1711
1722
|
model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any
|
@@ -2252,22 +2263,28 @@ class ModelToComponentFactory:
|
|
2252
2263
|
urls_extractor=urls_extractor,
|
2253
2264
|
)
|
2254
2265
|
|
2255
|
-
|
2266
|
+
async_job_partition_router = AsyncJobPartitionRouter(
|
2256
2267
|
job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator(
|
2257
2268
|
job_repository,
|
2258
2269
|
stream_slices,
|
2259
|
-
JobTracker(
|
2260
|
-
|
2261
|
-
), # FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1
|
2270
|
+
JobTracker(1),
|
2271
|
+
# FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1
|
2262
2272
|
self._message_repository,
|
2263
|
-
has_bulk_parent=False,
|
2273
|
+
has_bulk_parent=False,
|
2274
|
+
# FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
|
2264
2275
|
),
|
2265
|
-
record_selector=record_selector,
|
2266
2276
|
stream_slicer=stream_slicer,
|
2267
2277
|
config=config,
|
2268
2278
|
parameters=model.parameters or {},
|
2269
2279
|
)
|
2270
2280
|
|
2281
|
+
return AsyncRetriever(
|
2282
|
+
record_selector=record_selector,
|
2283
|
+
stream_slicer=async_job_partition_router,
|
2284
|
+
config=config,
|
2285
|
+
parameters=model.parameters or {},
|
2286
|
+
)
|
2287
|
+
|
2271
2288
|
@staticmethod
|
2272
2289
|
def create_spec(model: SpecModel, config: Config, **kwargs: Any) -> Spec:
|
2273
2290
|
return Spec(
|
@@ -2,10 +2,18 @@
|
|
2
2
|
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
+
from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import AsyncJobPartitionRouter
|
5
6
|
from airbyte_cdk.sources.declarative.partition_routers.cartesian_product_stream_slicer import CartesianProductStreamSlicer
|
6
7
|
from airbyte_cdk.sources.declarative.partition_routers.list_partition_router import ListPartitionRouter
|
7
8
|
from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import SinglePartitionRouter
|
8
9
|
from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import SubstreamPartitionRouter
|
9
10
|
from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
|
10
11
|
|
11
|
-
__all__ = [
|
12
|
+
__all__ = [
|
13
|
+
"AsyncJobPartitionRouter",
|
14
|
+
"CartesianProductStreamSlicer",
|
15
|
+
"ListPartitionRouter",
|
16
|
+
"SinglePartitionRouter",
|
17
|
+
"SubstreamPartitionRouter",
|
18
|
+
"PartitionRouter"
|
19
|
+
]
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from dataclasses import InitVar, dataclass, field
|
4
|
+
from typing import Any, Callable, Iterable, Mapping, Optional
|
5
|
+
|
6
|
+
from airbyte_cdk.models import FailureType
|
7
|
+
from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
|
8
|
+
AsyncJobOrchestrator,
|
9
|
+
AsyncPartition,
|
10
|
+
)
|
11
|
+
from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import (
|
12
|
+
SinglePartitionRouter,
|
13
|
+
)
|
14
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
|
15
|
+
from airbyte_cdk.sources.types import Config, StreamSlice
|
16
|
+
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
17
|
+
|
18
|
+
|
19
|
+
@dataclass
|
20
|
+
class AsyncJobPartitionRouter(StreamSlicer):
|
21
|
+
"""
|
22
|
+
Partition router that creates async jobs in a source API, periodically polls for job
|
23
|
+
completion, and supplies the completed job URL locations as stream slices so that
|
24
|
+
records can be extracted.
|
25
|
+
"""
|
26
|
+
|
27
|
+
config: Config
|
28
|
+
parameters: InitVar[Mapping[str, Any]]
|
29
|
+
job_orchestrator_factory: Callable[[Iterable[StreamSlice]], AsyncJobOrchestrator]
|
30
|
+
stream_slicer: StreamSlicer = field(
|
31
|
+
default_factory=lambda: SinglePartitionRouter(parameters={})
|
32
|
+
)
|
33
|
+
|
34
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
35
|
+
self._job_orchestrator_factory = self.job_orchestrator_factory
|
36
|
+
self._job_orchestrator: Optional[AsyncJobOrchestrator] = None
|
37
|
+
self._parameters = parameters
|
38
|
+
|
39
|
+
def stream_slices(self) -> Iterable[StreamSlice]:
|
40
|
+
slices = self.stream_slicer.stream_slices()
|
41
|
+
self._job_orchestrator = self._job_orchestrator_factory(slices)
|
42
|
+
|
43
|
+
for completed_partition in self._job_orchestrator.create_and_get_completed_partitions():
|
44
|
+
yield StreamSlice(
|
45
|
+
partition=dict(completed_partition.stream_slice.partition)
|
46
|
+
| {"partition": completed_partition},
|
47
|
+
cursor_slice=completed_partition.stream_slice.cursor_slice,
|
48
|
+
)
|
49
|
+
|
50
|
+
def fetch_records(self, partition: AsyncPartition) -> Iterable[Mapping[str, Any]]:
|
51
|
+
"""
|
52
|
+
This method of fetching records extends beyond what a PartitionRouter/StreamSlicer should
|
53
|
+
be responsible for. However, this was added in because the JobOrchestrator is required to
|
54
|
+
retrieve records. And without defining fetch_records() on this class, we're stuck with either
|
55
|
+
passing the JobOrchestrator to the AsyncRetriever or storing it on multiple classes.
|
56
|
+
"""
|
57
|
+
|
58
|
+
if not self._job_orchestrator:
|
59
|
+
raise AirbyteTracedException(
|
60
|
+
message="Invalid state within AsyncJobRetriever. Please contact Airbyte Support",
|
61
|
+
internal_message="AsyncPartitionRepository is expected to be accessed only after `stream_slices`",
|
62
|
+
failure_type=FailureType.system_error,
|
63
|
+
)
|
64
|
+
|
65
|
+
return self._job_orchestrator.fetch_records(partition=partition)
|
@@ -1,8 +1,8 @@
|
|
1
1
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
2
|
|
3
3
|
|
4
|
-
from dataclasses import InitVar, dataclass
|
5
|
-
from typing import Any,
|
4
|
+
from dataclasses import InitVar, dataclass
|
5
|
+
from typing import Any, Iterable, Mapping, Optional
|
6
6
|
|
7
7
|
from typing_extensions import deprecated
|
8
8
|
|
@@ -12,9 +12,10 @@ from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
|
|
12
12
|
AsyncPartition,
|
13
13
|
)
|
14
14
|
from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
|
15
|
-
from airbyte_cdk.sources.declarative.partition_routers import
|
15
|
+
from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
|
16
|
+
AsyncJobPartitionRouter,
|
17
|
+
)
|
16
18
|
from airbyte_cdk.sources.declarative.retrievers import Retriever
|
17
|
-
from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
|
18
19
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
19
20
|
from airbyte_cdk.sources.streams.core import StreamData
|
20
21
|
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
@@ -29,15 +30,10 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
|
29
30
|
class AsyncRetriever(Retriever):
|
30
31
|
config: Config
|
31
32
|
parameters: InitVar[Mapping[str, Any]]
|
32
|
-
job_orchestrator_factory: Callable[[Iterable[StreamSlice]], AsyncJobOrchestrator]
|
33
33
|
record_selector: RecordSelector
|
34
|
-
stream_slicer:
|
35
|
-
default_factory=lambda: SinglePartitionRouter(parameters={})
|
36
|
-
)
|
34
|
+
stream_slicer: AsyncJobPartitionRouter
|
37
35
|
|
38
36
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
39
|
-
self._job_orchestrator_factory = self.job_orchestrator_factory
|
40
|
-
self.__job_orchestrator: Optional[AsyncJobOrchestrator] = None
|
41
37
|
self._parameters = parameters
|
42
38
|
|
43
39
|
@property
|
@@ -54,17 +50,6 @@ class AsyncRetriever(Retriever):
|
|
54
50
|
"""
|
55
51
|
pass
|
56
52
|
|
57
|
-
@property
|
58
|
-
def _job_orchestrator(self) -> AsyncJobOrchestrator:
|
59
|
-
if not self.__job_orchestrator:
|
60
|
-
raise AirbyteTracedException(
|
61
|
-
message="Invalid state within AsyncJobRetriever. Please contact Airbyte Support",
|
62
|
-
internal_message="AsyncPartitionRepository is expected to be accessed only after `stream_slices`",
|
63
|
-
failure_type=FailureType.system_error,
|
64
|
-
)
|
65
|
-
|
66
|
-
return self.__job_orchestrator
|
67
|
-
|
68
53
|
def _get_stream_state(self) -> StreamState:
|
69
54
|
"""
|
70
55
|
Gets the current state of the stream.
|
@@ -99,15 +84,7 @@ class AsyncRetriever(Retriever):
|
|
99
84
|
return stream_slice["partition"] # type: ignore # stream_slice["partition"] has been added as an AsyncPartition as part of stream_slices
|
100
85
|
|
101
86
|
def stream_slices(self) -> Iterable[Optional[StreamSlice]]:
|
102
|
-
|
103
|
-
self.__job_orchestrator = self._job_orchestrator_factory(slices)
|
104
|
-
|
105
|
-
for completed_partition in self._job_orchestrator.create_and_get_completed_partitions():
|
106
|
-
yield StreamSlice(
|
107
|
-
partition=dict(completed_partition.stream_slice.partition)
|
108
|
-
| {"partition": completed_partition},
|
109
|
-
cursor_slice=completed_partition.stream_slice.cursor_slice,
|
110
|
-
)
|
87
|
+
return self.stream_slicer.stream_slices()
|
111
88
|
|
112
89
|
def read_records(
|
113
90
|
self,
|
@@ -116,7 +93,7 @@ class AsyncRetriever(Retriever):
|
|
116
93
|
) -> Iterable[StreamData]:
|
117
94
|
stream_state: StreamState = self._get_stream_state()
|
118
95
|
partition: AsyncPartition = self._validate_and_get_stream_slice_partition(stream_slice)
|
119
|
-
records: Iterable[Mapping[str, Any]] = self.
|
96
|
+
records: Iterable[Mapping[str, Any]] = self.stream_slicer.fetch_records(partition)
|
120
97
|
|
121
98
|
yield from self.record_selector.filter_and_transform(
|
122
99
|
all_data=records,
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
|
6
6
|
from copy import deepcopy
|
7
|
-
from dataclasses import InitVar, dataclass
|
7
|
+
from dataclasses import InitVar, dataclass, field
|
8
8
|
from typing import Any, List, Mapping, MutableMapping, Optional, Union
|
9
9
|
|
10
10
|
import dpath
|
@@ -13,8 +13,9 @@ from typing_extensions import deprecated
|
|
13
13
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
14
14
|
from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
15
15
|
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
|
16
|
+
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
16
17
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
17
|
-
from airbyte_cdk.sources.types import Config
|
18
|
+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
18
19
|
|
19
20
|
AIRBYTE_DATA_TYPES: Mapping[str, Mapping[str, Any]] = {
|
20
21
|
"string": {"type": ["null", "string"]},
|
@@ -103,6 +104,7 @@ class DynamicSchemaLoader(SchemaLoader):
|
|
103
104
|
config: Config
|
104
105
|
parameters: InitVar[Mapping[str, Any]]
|
105
106
|
schema_type_identifier: SchemaTypeIdentifier
|
107
|
+
schema_transformations: List[RecordTransformation] = field(default_factory=lambda: [])
|
106
108
|
|
107
109
|
def get_json_schema(self) -> Mapping[str, Any]:
|
108
110
|
"""
|
@@ -128,12 +130,27 @@ class DynamicSchemaLoader(SchemaLoader):
|
|
128
130
|
)
|
129
131
|
properties[key] = value
|
130
132
|
|
133
|
+
transformed_properties = self._transform(properties, {})
|
134
|
+
|
131
135
|
return {
|
132
136
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
133
137
|
"type": "object",
|
134
|
-
"properties":
|
138
|
+
"properties": transformed_properties,
|
135
139
|
}
|
136
140
|
|
141
|
+
def _transform(
|
142
|
+
self,
|
143
|
+
properties: Mapping[str, Any],
|
144
|
+
stream_state: StreamState,
|
145
|
+
stream_slice: Optional[StreamSlice] = None,
|
146
|
+
) -> Mapping[str, Any]:
|
147
|
+
for transformation in self.schema_transformations:
|
148
|
+
transformation.transform(
|
149
|
+
properties, # type: ignore # properties has type Mapping[str, Any], but Dict[str, Any] expected
|
150
|
+
config=self.config,
|
151
|
+
)
|
152
|
+
return properties
|
153
|
+
|
137
154
|
def _get_key(
|
138
155
|
self,
|
139
156
|
raw_schema: MutableMapping[str, Any],
|
@@ -0,0 +1,50 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from dataclasses import dataclass
|
6
|
+
from typing import Any, Dict, Optional
|
7
|
+
|
8
|
+
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
9
|
+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
10
|
+
|
11
|
+
|
12
|
+
@dataclass
|
13
|
+
class FlattenFields(RecordTransformation):
|
14
|
+
def transform(
|
15
|
+
self,
|
16
|
+
record: Dict[str, Any],
|
17
|
+
config: Optional[Config] = None,
|
18
|
+
stream_state: Optional[StreamState] = None,
|
19
|
+
stream_slice: Optional[StreamSlice] = None,
|
20
|
+
) -> None:
|
21
|
+
transformed_record = self.flatten_record(record)
|
22
|
+
record.clear()
|
23
|
+
record.update(transformed_record)
|
24
|
+
|
25
|
+
def flatten_record(self, record: Dict[str, Any]) -> Dict[str, Any]:
|
26
|
+
stack = [(record, "_")]
|
27
|
+
transformed_record: Dict[str, Any] = {}
|
28
|
+
force_with_parent_name = False
|
29
|
+
|
30
|
+
while stack:
|
31
|
+
current_record, parent_key = stack.pop()
|
32
|
+
|
33
|
+
if isinstance(current_record, dict):
|
34
|
+
for current_key, value in current_record.items():
|
35
|
+
new_key = (
|
36
|
+
f"{parent_key}.{current_key}"
|
37
|
+
if (current_key in transformed_record or force_with_parent_name)
|
38
|
+
else current_key
|
39
|
+
)
|
40
|
+
stack.append((value, new_key))
|
41
|
+
|
42
|
+
elif isinstance(current_record, list):
|
43
|
+
for i, item in enumerate(current_record):
|
44
|
+
force_with_parent_name = True
|
45
|
+
stack.append((item, f"{parent_key}.{i}"))
|
46
|
+
|
47
|
+
else:
|
48
|
+
transformed_record[parent_key] = current_record
|
49
|
+
|
50
|
+
return transformed_record
|
@@ -0,0 +1,68 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
import re
|
6
|
+
from dataclasses import dataclass
|
7
|
+
from typing import Any, Dict, List, Optional
|
8
|
+
|
9
|
+
import unidecode
|
10
|
+
|
11
|
+
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
12
|
+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
13
|
+
|
14
|
+
|
15
|
+
@dataclass
|
16
|
+
class KeysToSnakeCaseTransformation(RecordTransformation):
|
17
|
+
token_pattern: re.Pattern[str] = re.compile(
|
18
|
+
r"[A-Z]+[a-z]*|[a-z]+|\d+|(?P<NoToken>[^a-zA-Z\d]+)"
|
19
|
+
)
|
20
|
+
|
21
|
+
def transform(
|
22
|
+
self,
|
23
|
+
record: Dict[str, Any],
|
24
|
+
config: Optional[Config] = None,
|
25
|
+
stream_state: Optional[StreamState] = None,
|
26
|
+
stream_slice: Optional[StreamSlice] = None,
|
27
|
+
) -> None:
|
28
|
+
transformed_record = self._transform_record(record)
|
29
|
+
record.clear()
|
30
|
+
record.update(transformed_record)
|
31
|
+
|
32
|
+
def _transform_record(self, record: Dict[str, Any]) -> Dict[str, Any]:
|
33
|
+
transformed_record = {}
|
34
|
+
for key, value in record.items():
|
35
|
+
transformed_key = self.process_key(key)
|
36
|
+
transformed_value = value
|
37
|
+
|
38
|
+
if isinstance(value, dict):
|
39
|
+
transformed_value = self._transform_record(value)
|
40
|
+
|
41
|
+
transformed_record[transformed_key] = transformed_value
|
42
|
+
return transformed_record
|
43
|
+
|
44
|
+
def process_key(self, key: str) -> str:
|
45
|
+
key = self.normalize_key(key)
|
46
|
+
tokens = self.tokenize_key(key)
|
47
|
+
tokens = self.filter_tokens(tokens)
|
48
|
+
return self.tokens_to_snake_case(tokens)
|
49
|
+
|
50
|
+
def normalize_key(self, key: str) -> str:
|
51
|
+
return unidecode.unidecode(key)
|
52
|
+
|
53
|
+
def tokenize_key(self, key: str) -> List[str]:
|
54
|
+
tokens = []
|
55
|
+
for match in self.token_pattern.finditer(key):
|
56
|
+
token = match.group(0) if match.group("NoToken") is None else ""
|
57
|
+
tokens.append(token)
|
58
|
+
return tokens
|
59
|
+
|
60
|
+
def filter_tokens(self, tokens: List[str]) -> List[str]:
|
61
|
+
if len(tokens) >= 3:
|
62
|
+
tokens = tokens[:1] + [t for t in tokens[1:-1] if t] + tokens[-1:]
|
63
|
+
if tokens and tokens[0].isdigit():
|
64
|
+
tokens.insert(0, "")
|
65
|
+
return tokens
|
66
|
+
|
67
|
+
def tokens_to_snake_case(self, tokens: List[str]) -> str:
|
68
|
+
return "_".join(token.lower() for token in tokens)
|
@@ -262,7 +262,7 @@ class HttpClient:
|
|
262
262
|
user_backoff_handler = user_defined_backoff_handler(max_tries=max_tries, max_time=max_time)(
|
263
263
|
self._send
|
264
264
|
)
|
265
|
-
rate_limit_backoff_handler = rate_limit_default_backoff_handler()
|
265
|
+
rate_limit_backoff_handler = rate_limit_default_backoff_handler(max_tries=max_tries)
|
266
266
|
backoff_handler = http_client_default_backoff_handler(
|
267
267
|
max_tries=max_tries, max_time=max_time
|
268
268
|
)
|
@@ -472,7 +472,9 @@ class HttpClient:
|
|
472
472
|
|
473
473
|
elif retry_endlessly:
|
474
474
|
raise RateLimitBackoffException(
|
475
|
-
request=request,
|
475
|
+
request=request,
|
476
|
+
response=(response if response is not None else exc),
|
477
|
+
error_message=error_message,
|
476
478
|
)
|
477
479
|
|
478
480
|
raise DefaultBackoffException(
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: airbyte-cdk
|
3
|
-
Version: 6.12.
|
3
|
+
Version: 6.12.1
|
4
4
|
Summary: A framework for writing Airbyte Connectors.
|
5
5
|
Home-page: https://airbyte.com
|
6
6
|
License: MIT
|
@@ -22,6 +22,7 @@ Provides-Extra: sql
|
|
22
22
|
Provides-Extra: vector-db-based
|
23
23
|
Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
|
24
24
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
25
|
+
Requires-Dist: Unidecode (>=1.3,<2.0)
|
25
26
|
Requires-Dist: airbyte-protocol-models-dataclasses (>=0.14,<0.15)
|
26
27
|
Requires-Dist: avro (>=1.11.2,<1.12.0) ; extra == "file-based"
|
27
28
|
Requires-Dist: backoff
|
@@ -66,18 +66,15 @@ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=PxP4p268
|
|
66
66
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
|
67
67
|
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
|
68
68
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
|
69
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=
|
69
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=Ls6VUtfX2GvJHR-hv5rs9azjAnunmr8JqZh8vi0DmU4,129264
|
70
70
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
|
71
71
|
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
|
72
|
-
airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=
|
72
|
+
airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=hNlhaB5FjNC6IfJyglj5ZJWkYD2nEAukMDmzRz5PC6o,671
|
73
73
|
airbyte_cdk/sources/declarative/decoders/decoder.py,sha256=sl-Gt8lXi7yD2Q-sD8je5QS2PbgrgsYjxRLWsay7DMc,826
|
74
74
|
airbyte_cdk/sources/declarative/decoders/json_decoder.py,sha256=qdbjeR6RffKaah_iWvMsOcDolYuxJY5DaI3b9AMTZXg,3327
|
75
75
|
airbyte_cdk/sources/declarative/decoders/noop_decoder.py,sha256=iZh0yKY_JzgBnJWiubEusf5c0o6Khd-8EWFWT-8EgFo,542
|
76
76
|
airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py,sha256=ZVBZhAOl0I0MymXN5CKTC-kIXG4GuUQAEyn0XpUDuSE,1081
|
77
|
-
airbyte_cdk/sources/declarative/decoders/parsers/__init__.py,sha256=3qvAJF3O2Ie0qK5ANgsEKR136t7HKk53rR_FyO8ihuY,186
|
78
|
-
airbyte_cdk/sources/declarative/decoders/parsers/parsers.py,sha256=Xlz4xmRYAtMFqdvAMwEdTxh-90glKlijMEm6qLOyuAs,1257
|
79
77
|
airbyte_cdk/sources/declarative/decoders/xml_decoder.py,sha256=EU-7t-5vIGRHZ14h-f0GUE4V5-eTM9Flux-A8xgI1Rc,3117
|
80
|
-
airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py,sha256=IPLCgYpab6og6j7BdvBuhn_7c1ZYyg8yu48DPkvA84c,1752
|
81
78
|
airbyte_cdk/sources/declarative/exceptions.py,sha256=kTPUA4I2NV4J6HDz-mKPGMrfuc592akJnOyYx38l_QM,176
|
82
79
|
airbyte_cdk/sources/declarative/extractors/__init__.py,sha256=YFuL4D4RuuB8E1DNSbJNIj0_HApOlyECoJ_s8DuJMeI,611
|
83
80
|
airbyte_cdk/sources/declarative/extractors/dpath_extractor.py,sha256=wR4Ol4MG2lt5UlqXF5EU_k7qa5cN4_-luu3PJ1PlO3A,3131
|
@@ -107,13 +104,14 @@ airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW
|
|
107
104
|
airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
|
108
105
|
airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
|
109
106
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
|
110
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=
|
107
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=kESlZ8F2i2q3BMZfEhwEk0XEzxr7SWtJti6AoPsthg0,90831
|
111
108
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
112
109
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
|
113
110
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
|
114
111
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
|
115
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
116
|
-
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=
|
112
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=i2Z4q_9pYWc40uiHJ3UMqAh1hgoVTTXybxRaWzbwNHE,106031
|
113
|
+
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=974SY1RFwitUCiiDHuFHDGmSNu1D72z3bSTpvlBwAho,911
|
114
|
+
airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=n82J15S8bjeMZ5uROu--P3hnbQoxkY5v7RPHYx7g7ro,2929
|
117
115
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
118
116
|
airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha256=t7pRdFWfFWJtQQG19c9PVeMODyO2BknRTakpM5U9N-8,4844
|
119
117
|
airbyte_cdk/sources/declarative/partition_routers/partition_router.py,sha256=YyEIzdmLd1FjbVP3QbQ2VFCLW_P-OGbVh6VpZShp54k,2218
|
@@ -160,12 +158,12 @@ airbyte_cdk/sources/declarative/resolvers/components_resolver.py,sha256=KPjKc0yb
|
|
160
158
|
airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py,sha256=dz4iJV9liD_LzY_Mn4XmAStoUll60R3MIGWV4aN3pgg,5223
|
161
159
|
airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=ZA2vrHQKfXNMcH3x1iuyFOTGNzYDhUFT2qcaiOzSK0A,4271
|
162
160
|
airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=FVQpUGVwp2Gibk4gp07VmLKX5AafUlsZWFSrDpUDuJM,443
|
163
|
-
airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=
|
161
|
+
airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=3jgor7a6_s_9KgqHmPk6cWMDZ-6OugFPjCajIkC3Onw,3721
|
164
162
|
airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
|
165
163
|
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=N4swGw5mfuTXJ2R7AKX18CHzizsr69pXwt5uSHLPi48,24172
|
166
164
|
airbyte_cdk/sources/declarative/schema/__init__.py,sha256=Io9vninzlEjQ2uFmWklxfwNM0cXfljtzOz5zL1OVyT4,701
|
167
165
|
airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=KTACrIE23a83wsm3Rd9Eb4K6-20lrGqYxTHNp9yxsso,1820
|
168
|
-
airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=
|
166
|
+
airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=H6A3NQ6kPPM-cUNPmdvDPc9xNzR1rQNrK95GbgCW334,8822
|
169
167
|
airbyte_cdk/sources/declarative/schema/inline_schema_loader.py,sha256=bVETE10hRsatRJq3R3BeyRR0wIoK3gcP1gcpVRQ_P5U,464
|
170
168
|
airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py,sha256=5Wl-fqW-pVf_dxJ4yGHMAFfC4JjKHYJhqFJT1xA57F4,4177
|
171
169
|
airbyte_cdk/sources/declarative/schema/schema_loader.py,sha256=kjt8v0N5wWKA5zyLnrDLxf1PJKdUqvQq2RVnAOAzNSY,379
|
@@ -176,7 +174,9 @@ airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.p
|
|
176
174
|
airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py,sha256=SOkIPBi2Wu7yxIvA15yFzUAB95a3IzA8LPq5DEqHQQc,725
|
177
175
|
airbyte_cdk/sources/declarative/transformations/__init__.py,sha256=CPJ8TlMpiUmvG3624VYu_NfTzxwKcfBjM2Q2wJ7fkSA,919
|
178
176
|
airbyte_cdk/sources/declarative/transformations/add_fields.py,sha256=r4YdAuAk2bQtNWJMztIIy2CC-NglD9NeK1s1TeO9wkw,5027
|
177
|
+
airbyte_cdk/sources/declarative/transformations/flatten_fields.py,sha256=ti9fLVk-EpMeDY7ImduvQq1YGounLYmH9dHzp7MIRxk,1703
|
179
178
|
airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py,sha256=RTs5KX4V3hM7A6QN1WlGF21YccTIyNH6qQI9IMb__hw,670
|
179
|
+
airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py,sha256=43zwe6_F5ba5C4eY0RgXxPz7ndPKZfXGChHepFn-2lk,2263
|
180
180
|
airbyte_cdk/sources/declarative/transformations/remove_fields.py,sha256=EwUP0SZ2p4GRJ6Q8CUzlz9dcUeEidEFDlI2IBye2tlc,2745
|
181
181
|
airbyte_cdk/sources/declarative/transformations/transformation.py,sha256=4sXtx9cNY2EHUPq-xHvDs8GQEBUy3Eo6TkRLKHPXx68,1161
|
182
182
|
airbyte_cdk/sources/declarative/types.py,sha256=yqx0xlZv_76tkC7fqJKefmvl4GJJ8mXbeddwVV8XRJU,778
|
@@ -281,7 +281,7 @@ airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py,sha
|
|
281
281
|
airbyte_cdk/sources/streams/http/error_handlers/response_models.py,sha256=xGIVELBFY0TmH9aUq1ikoqJz8oHLr6di2JLvKWVEO-s,2236
|
282
282
|
airbyte_cdk/sources/streams/http/exceptions.py,sha256=njC7MlMJoFYcSGz4mIp6-bqLFTr6vC8ej25X0oSeyjE,1824
|
283
283
|
airbyte_cdk/sources/streams/http/http.py,sha256=JAMpiTdS9HFNOlwayWNvQdxoqs2rpW9wdYlhFHv_1Q4,28496
|
284
|
-
airbyte_cdk/sources/streams/http/http_client.py,sha256=
|
284
|
+
airbyte_cdk/sources/streams/http/http_client.py,sha256=tDE0ROtxjGMVphvsw8INvGMtZ97hIF-v47pZ3jIyiwc,23011
|
285
285
|
airbyte_cdk/sources/streams/http/rate_limiting.py,sha256=IwdjrHKUnU97XO4qONgYRv4YYW51xQ8SJm4WLafXDB8,6351
|
286
286
|
airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py,sha256=RN0D3nOX1xLgwEwKWu6pkGy3XqBFzKSNZ8Lf6umU2eY,413
|
287
287
|
airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py,sha256=nxI94yJ3bGfpDO8RR3QvOJ-PSW0n9CElSAkgl5ae80Y,10321
|
@@ -339,8 +339,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
|
|
339
339
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
340
340
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
341
341
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
342
|
-
airbyte_cdk-6.12.
|
343
|
-
airbyte_cdk-6.12.
|
344
|
-
airbyte_cdk-6.12.
|
345
|
-
airbyte_cdk-6.12.
|
346
|
-
airbyte_cdk-6.12.
|
342
|
+
airbyte_cdk-6.12.1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
343
|
+
airbyte_cdk-6.12.1.dist-info/METADATA,sha256=w7op06XAtTqV47wgvbGddUc7pHN6TQB2YvBh8vjQ7FM,5988
|
344
|
+
airbyte_cdk-6.12.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
345
|
+
airbyte_cdk-6.12.1.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
346
|
+
airbyte_cdk-6.12.1.dist-info/RECORD,,
|
@@ -1,49 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
|
-
#
|
4
|
-
|
5
|
-
import json
|
6
|
-
import logging
|
7
|
-
from abc import abstractmethod
|
8
|
-
from dataclasses import InitVar, dataclass
|
9
|
-
from typing import Any, Generator, Mapping, MutableMapping, Union
|
10
|
-
|
11
|
-
logger = logging.getLogger("airbyte")
|
12
|
-
|
13
|
-
|
14
|
-
@dataclass
|
15
|
-
class Parser:
|
16
|
-
"""
|
17
|
-
Parser strategy to convert str, bytes, or bytearray data into MutableMapping[str, Any].
|
18
|
-
"""
|
19
|
-
|
20
|
-
@abstractmethod
|
21
|
-
def parse(
|
22
|
-
self, data: Union[str, bytes, bytearray]
|
23
|
-
) -> Generator[MutableMapping[str, Any], None, None]:
|
24
|
-
pass
|
25
|
-
|
26
|
-
|
27
|
-
@dataclass
|
28
|
-
class JsonParser(Parser):
|
29
|
-
"""
|
30
|
-
Parser strategy for converting JSON-structure str, bytes, or bytearray data into MutableMapping[str, Any].
|
31
|
-
"""
|
32
|
-
|
33
|
-
parameters: InitVar[Mapping[str, Any]]
|
34
|
-
|
35
|
-
def parse(
|
36
|
-
self, data: Union[str, bytes, bytearray]
|
37
|
-
) -> Generator[MutableMapping[str, Any], None, None]:
|
38
|
-
try:
|
39
|
-
body_json = json.loads(data)
|
40
|
-
except json.JSONDecodeError:
|
41
|
-
logger.warning(f"Data cannot be parsed into json: {data=}")
|
42
|
-
yield {}
|
43
|
-
|
44
|
-
if not isinstance(body_json, list):
|
45
|
-
body_json = [body_json]
|
46
|
-
if len(body_json) == 0:
|
47
|
-
yield {}
|
48
|
-
else:
|
49
|
-
yield from body_json
|
@@ -1,54 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
|
-
#
|
4
|
-
|
5
|
-
import gzip
|
6
|
-
import io
|
7
|
-
import logging
|
8
|
-
import zipfile
|
9
|
-
from dataclasses import InitVar, dataclass
|
10
|
-
from typing import Any, Generator, Mapping, MutableMapping, Optional
|
11
|
-
|
12
|
-
import requests
|
13
|
-
|
14
|
-
from airbyte_cdk.sources.declarative.decoders import Decoder
|
15
|
-
from airbyte_cdk.sources.declarative.decoders.parsers import JsonParser, Parser
|
16
|
-
|
17
|
-
logger = logging.getLogger("airbyte")
|
18
|
-
|
19
|
-
|
20
|
-
@dataclass
|
21
|
-
class ZipfileDecoder(Decoder):
|
22
|
-
parameters: InitVar[Mapping[str, Any]]
|
23
|
-
parser: Optional[Parser] = None
|
24
|
-
|
25
|
-
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
26
|
-
self._parser = (
|
27
|
-
self.parser(parameters=parameters) if self.parser else JsonParser(parameters=parameters)
|
28
|
-
)
|
29
|
-
|
30
|
-
def is_stream_response(self) -> bool:
|
31
|
-
return False
|
32
|
-
|
33
|
-
def decode(
|
34
|
-
self, response: requests.Response
|
35
|
-
) -> Generator[MutableMapping[str, Any], None, None]:
|
36
|
-
try:
|
37
|
-
zip_file = zipfile.ZipFile(io.BytesIO(response.content))
|
38
|
-
except zipfile.BadZipFile as e:
|
39
|
-
logger.exception(e)
|
40
|
-
logger.error(
|
41
|
-
f"Received an invalid zip file in response to URL: {response.request.url}. "
|
42
|
-
f"The size of the response body is: {len(response.content)}"
|
43
|
-
)
|
44
|
-
yield {}
|
45
|
-
|
46
|
-
for gzip_filename in zip_file.namelist():
|
47
|
-
with zip_file.open(gzip_filename) as file:
|
48
|
-
try:
|
49
|
-
for data in gzip.open(file):
|
50
|
-
yield from self._parser.parse(data)
|
51
|
-
except gzip.BadGzipFile as e:
|
52
|
-
logger.exception(e)
|
53
|
-
logger.error(f"Fail to read contents of zipped response: {e}")
|
54
|
-
yield {}
|
File without changes
|
File without changes
|
File without changes
|