airbyte-cdk 6.12.1.dev0__py3-none-any.whl → 6.12.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +77 -35
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +65 -44
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +48 -7
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +9 -1
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +20 -14
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +8 -31
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +20 -3
- airbyte_cdk/sources/declarative/transformations/flatten_fields.py +50 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
- airbyte_cdk/sources/streams/core.py +6 -6
- airbyte_cdk/sources/streams/http/http_client.py +4 -2
- {airbyte_cdk-6.12.1.dev0.dist-info → airbyte_cdk-6.12.2.dist-info}/METADATA +3 -2
- {airbyte_cdk-6.12.1.dev0.dist-info → airbyte_cdk-6.12.2.dist-info}/RECORD +17 -14
- {airbyte_cdk-6.12.1.dev0.dist-info → airbyte_cdk-6.12.2.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.12.1.dev0.dist-info → airbyte_cdk-6.12.2.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.12.1.dev0.dist-info → airbyte_cdk-6.12.2.dist-info}/entry_points.txt +0 -0
@@ -1234,6 +1234,8 @@ definitions:
|
|
1234
1234
|
- "$ref": "#/definitions/CustomTransformation"
|
1235
1235
|
- "$ref": "#/definitions/RemoveFields"
|
1236
1236
|
- "$ref": "#/definitions/KeysToLower"
|
1237
|
+
- "$ref": "#/definitions/KeysToSnakeCase"
|
1238
|
+
- "$ref": "#/definitions/FlattenFields"
|
1237
1239
|
state_migrations:
|
1238
1240
|
title: State Migrations
|
1239
1241
|
description: Array of state migrations to be applied on the input state
|
@@ -1766,6 +1768,18 @@ definitions:
|
|
1766
1768
|
- "$ref": "#/definitions/AsyncRetriever"
|
1767
1769
|
- "$ref": "#/definitions/CustomRetriever"
|
1768
1770
|
- "$ref": "#/definitions/SimpleRetriever"
|
1771
|
+
schema_transformations:
|
1772
|
+
title: Schema Transformations
|
1773
|
+
description: A list of transformations to be applied to the schema.
|
1774
|
+
type: array
|
1775
|
+
items:
|
1776
|
+
anyOf:
|
1777
|
+
- "$ref": "#/definitions/AddFields"
|
1778
|
+
- "$ref": "#/definitions/CustomTransformation"
|
1779
|
+
- "$ref": "#/definitions/RemoveFields"
|
1780
|
+
- "$ref": "#/definitions/KeysToLower"
|
1781
|
+
- "$ref": "#/definitions/KeysToSnakeCase"
|
1782
|
+
- "$ref": "#/definitions/FlattenFields"
|
1769
1783
|
schema_type_identifier:
|
1770
1784
|
"$ref": "#/definitions/SchemaTypeIdentifier"
|
1771
1785
|
$parameters:
|
@@ -1838,6 +1852,32 @@ definitions:
|
|
1838
1852
|
$parameters:
|
1839
1853
|
type: object
|
1840
1854
|
additionalProperties: true
|
1855
|
+
KeysToSnakeCase:
|
1856
|
+
title: Key to Snake Case
|
1857
|
+
description: A transformation that renames all keys to snake case.
|
1858
|
+
type: object
|
1859
|
+
required:
|
1860
|
+
- type
|
1861
|
+
properties:
|
1862
|
+
type:
|
1863
|
+
type: string
|
1864
|
+
enum: [KeysToSnakeCase]
|
1865
|
+
$parameters:
|
1866
|
+
type: object
|
1867
|
+
additionalProperties: true
|
1868
|
+
FlattenFields:
|
1869
|
+
title: Flatten Fields
|
1870
|
+
description: A transformation that flatten record to single level format.
|
1871
|
+
type: object
|
1872
|
+
required:
|
1873
|
+
- type
|
1874
|
+
properties:
|
1875
|
+
type:
|
1876
|
+
type: string
|
1877
|
+
enum: [FlattenFields]
|
1878
|
+
$parameters:
|
1879
|
+
type: object
|
1880
|
+
additionalProperties: true
|
1841
1881
|
IterableDecoder:
|
1842
1882
|
title: Iterable Decoder
|
1843
1883
|
description: Use this if the response consists of strings separated by new lines (`\n`). The Decoder will wrap each row into a JSON object with the `record` key.
|
@@ -2130,65 +2170,63 @@ definitions:
|
|
2130
2170
|
- extract_output
|
2131
2171
|
properties:
|
2132
2172
|
consent_url:
|
2133
|
-
title:
|
2173
|
+
title: Consent URL
|
2134
2174
|
type: string
|
2135
2175
|
description: |-
|
2136
2176
|
The DeclarativeOAuth Specific string URL string template to initiate the authentication.
|
2137
2177
|
The placeholders are replaced during the processing to provide neccessary values.
|
2138
2178
|
examples:
|
2139
|
-
-
|
2140
|
-
-
|
2179
|
+
- https://domain.host.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}
|
2180
|
+
- https://endpoint.host.com/oauth2/authorize?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{scope_key}={urlEncoder:{{scope_key}}}&{state_key}={{state_key}}&subdomain={subdomain}
|
2141
2181
|
scope:
|
2142
|
-
title:
|
2182
|
+
title: Scopes
|
2143
2183
|
type: string
|
2144
2184
|
description: |-
|
2145
2185
|
The DeclarativeOAuth Specific string of the scopes needed to be grant for authenticated user.
|
2146
2186
|
examples:
|
2147
|
-
-
|
2187
|
+
- user:read user:read_orders workspaces:read
|
2148
2188
|
access_token_url:
|
2149
|
-
title:
|
2189
|
+
title: Access Token URL
|
2150
2190
|
type: string
|
2151
2191
|
description: |-
|
2152
2192
|
The DeclarativeOAuth Specific URL templated string to obtain the `access_token`, `refresh_token` etc.
|
2153
2193
|
The placeholders are replaced during the processing to provide neccessary values.
|
2154
2194
|
examples:
|
2155
|
-
-
|
2195
|
+
- https://auth.host.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}
|
2156
2196
|
access_token_headers:
|
2157
|
-
title:
|
2197
|
+
title: Access Token Headers
|
2158
2198
|
type: object
|
2159
2199
|
additionalProperties: true
|
2160
2200
|
description: |-
|
2161
2201
|
The DeclarativeOAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.
|
2162
2202
|
examples:
|
2163
|
-
-
|
2164
|
-
{
|
2165
|
-
|
2166
|
-
}
|
2203
|
+
- {
|
2204
|
+
"Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}",
|
2205
|
+
}
|
2167
2206
|
access_token_params:
|
2168
|
-
title:
|
2207
|
+
title: Access Token Query Params (Json Encoded)
|
2169
2208
|
type: object
|
2170
2209
|
additionalProperties: true
|
2171
2210
|
description: |-
|
2172
2211
|
The DeclarativeOAuth Specific optional query parameters to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.
|
2173
2212
|
When this property is provided, the query params will be encoded as `Json` and included in the outgoing API request.
|
2174
2213
|
examples:
|
2175
|
-
-
|
2176
|
-
{
|
2177
|
-
|
2178
|
-
|
2179
|
-
|
2180
|
-
}
|
2214
|
+
- {
|
2215
|
+
"{auth_code_key}": "{{auth_code_key}}",
|
2216
|
+
"{client_id_key}": "{{client_id_key}}",
|
2217
|
+
"{client_secret_key}": "{{client_secret_key}}",
|
2218
|
+
}
|
2181
2219
|
extract_output:
|
2182
|
-
title:
|
2220
|
+
title: Extract Output
|
2183
2221
|
type: array
|
2184
2222
|
items:
|
2185
2223
|
type: string
|
2186
2224
|
description: |-
|
2187
2225
|
The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.
|
2188
2226
|
examples:
|
2189
|
-
-
|
2227
|
+
- ["access_token", "refresh_token", "other_field"]
|
2190
2228
|
state:
|
2191
|
-
title:
|
2229
|
+
title: Configurable State Query Param
|
2192
2230
|
type: object
|
2193
2231
|
additionalProperties: true
|
2194
2232
|
required:
|
@@ -2203,49 +2241,49 @@ definitions:
|
|
2203
2241
|
max:
|
2204
2242
|
type: integer
|
2205
2243
|
examples:
|
2206
|
-
-
|
2244
|
+
- { "min": 7, "max": 128 }
|
2207
2245
|
client_id_key:
|
2208
|
-
title:
|
2246
|
+
title: Client ID Key Override
|
2209
2247
|
type: string
|
2210
2248
|
description: |-
|
2211
2249
|
The DeclarativeOAuth Specific optional override to provide the custom `client_id` key name, if required by data-provider.
|
2212
2250
|
examples:
|
2213
|
-
-
|
2251
|
+
- "my_custom_client_id_key_name"
|
2214
2252
|
client_secret_key:
|
2215
|
-
title:
|
2253
|
+
title: Client Secret Key Override
|
2216
2254
|
type: string
|
2217
2255
|
description: |-
|
2218
2256
|
The DeclarativeOAuth Specific optional override to provide the custom `client_secret` key name, if required by data-provider.
|
2219
2257
|
examples:
|
2220
|
-
-
|
2258
|
+
- "my_custom_client_secret_key_name"
|
2221
2259
|
scope_key:
|
2222
|
-
title:
|
2260
|
+
title: Scopes Key Override
|
2223
2261
|
type: string
|
2224
2262
|
description: |-
|
2225
2263
|
The DeclarativeOAuth Specific optional override to provide the custom `scope` key name, if required by data-provider.
|
2226
2264
|
examples:
|
2227
|
-
-
|
2265
|
+
- "my_custom_scope_key_key_name"
|
2228
2266
|
state_key:
|
2229
|
-
title:
|
2267
|
+
title: State Key Override
|
2230
2268
|
type: string
|
2231
2269
|
description: |-
|
2232
2270
|
The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider.
|
2233
2271
|
examples:
|
2234
|
-
-
|
2272
|
+
- "my_custom_state_key_key_name"
|
2235
2273
|
auth_code_key:
|
2236
|
-
title:
|
2274
|
+
title: Auth Code Key Override
|
2237
2275
|
type: string
|
2238
2276
|
description: |-
|
2239
2277
|
The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider.
|
2240
2278
|
examples:
|
2241
|
-
-
|
2279
|
+
- "my_custom_auth_code_key_name"
|
2242
2280
|
redirect_uri_key:
|
2243
|
-
title:
|
2281
|
+
title: Redirect URI Key Override
|
2244
2282
|
type: string
|
2245
2283
|
description: |-
|
2246
2284
|
The DeclarativeOAuth Specific optional override to provide the custom `redirect_uri` key name to something like `callback_uri`, if required by data-provider.
|
2247
2285
|
examples:
|
2248
|
-
-
|
2286
|
+
- "my_custom_redirect_uri_key_name"
|
2249
2287
|
complete_oauth_output_specification:
|
2250
2288
|
title: "OAuth output specification"
|
2251
2289
|
description: |-
|
@@ -3011,6 +3049,7 @@ definitions:
|
|
3011
3049
|
interpolation_context:
|
3012
3050
|
- config
|
3013
3051
|
- components_values
|
3052
|
+
- stream_slice
|
3014
3053
|
- stream_template_config
|
3015
3054
|
examples:
|
3016
3055
|
- ["data"]
|
@@ -3027,10 +3066,13 @@ definitions:
|
|
3027
3066
|
- config
|
3028
3067
|
- stream_template_config
|
3029
3068
|
- components_values
|
3069
|
+
- stream_slice
|
3030
3070
|
examples:
|
3031
3071
|
- "{{ components_values['updates'] }}"
|
3032
3072
|
- "{{ components_values['MetaData']['LastUpdatedTime'] }}"
|
3033
3073
|
- "{{ config['segment_id'] }}"
|
3074
|
+
- "{{ stream_slice['parent_id'] }}"
|
3075
|
+
- "{{ stream_slice['extra_fields']['name'] }}"
|
3034
3076
|
value_type:
|
3035
3077
|
title: Value Type
|
3036
3078
|
description: The expected data type of the value. If omitted, the type will be inferred from the value provided.
|
@@ -710,6 +710,16 @@ class KeysToLower(BaseModel):
|
|
710
710
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
711
711
|
|
712
712
|
|
713
|
+
class KeysToSnakeCase(BaseModel):
|
714
|
+
type: Literal["KeysToSnakeCase"]
|
715
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
716
|
+
|
717
|
+
|
718
|
+
class FlattenFields(BaseModel):
|
719
|
+
type: Literal["FlattenFields"]
|
720
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
721
|
+
|
722
|
+
|
713
723
|
class IterableDecoder(BaseModel):
|
714
724
|
type: Literal["IterableDecoder"]
|
715
725
|
|
@@ -795,104 +805,90 @@ class OauthConnectorInputSpecification(BaseModel):
|
|
795
805
|
...,
|
796
806
|
description="The DeclarativeOAuth Specific string URL string template to initiate the authentication.\nThe placeholders are replaced during the processing to provide neccessary values.",
|
797
807
|
examples=[
|
798
|
-
{
|
799
|
-
|
800
|
-
},
|
801
|
-
{
|
802
|
-
"consent_url": "https://endpoint.host.com/oauth2/authorize?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{scope_key}={urlEncoder:{{scope_key}}}&{state_key}={{state_key}}&subdomain={subdomain}"
|
803
|
-
},
|
808
|
+
"https://domain.host.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}",
|
809
|
+
"https://endpoint.host.com/oauth2/authorize?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{scope_key}={urlEncoder:{{scope_key}}}&{state_key}={{state_key}}&subdomain={subdomain}",
|
804
810
|
],
|
805
|
-
title="
|
811
|
+
title="Consent URL",
|
806
812
|
)
|
807
813
|
scope: Optional[str] = Field(
|
808
814
|
None,
|
809
815
|
description="The DeclarativeOAuth Specific string of the scopes needed to be grant for authenticated user.",
|
810
|
-
examples=[
|
811
|
-
title="
|
816
|
+
examples=["user:read user:read_orders workspaces:read"],
|
817
|
+
title="Scopes",
|
812
818
|
)
|
813
819
|
access_token_url: str = Field(
|
814
820
|
...,
|
815
821
|
description="The DeclarativeOAuth Specific URL templated string to obtain the `access_token`, `refresh_token` etc.\nThe placeholders are replaced during the processing to provide neccessary values.",
|
816
822
|
examples=[
|
817
|
-
{
|
818
|
-
"access_token_url": "https://auth.host.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}"
|
819
|
-
}
|
823
|
+
"https://auth.host.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}"
|
820
824
|
],
|
821
|
-
title="
|
825
|
+
title="Access Token URL",
|
822
826
|
)
|
823
827
|
access_token_headers: Optional[Dict[str, Any]] = Field(
|
824
828
|
None,
|
825
829
|
description="The DeclarativeOAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.",
|
826
|
-
examples=[
|
827
|
-
|
828
|
-
"access_token_headers": {
|
829
|
-
"Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}"
|
830
|
-
}
|
831
|
-
}
|
832
|
-
],
|
833
|
-
title="(Optional) DeclarativeOAuth Access Token Headers",
|
830
|
+
examples=[{"Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}"}],
|
831
|
+
title="Access Token Headers",
|
834
832
|
)
|
835
833
|
access_token_params: Optional[Dict[str, Any]] = Field(
|
836
834
|
None,
|
837
835
|
description="The DeclarativeOAuth Specific optional query parameters to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.\nWhen this property is provided, the query params will be encoded as `Json` and included in the outgoing API request.",
|
838
836
|
examples=[
|
839
837
|
{
|
840
|
-
"
|
841
|
-
|
842
|
-
|
843
|
-
"{client_secret_key}": "{{client_secret_key}}",
|
844
|
-
}
|
838
|
+
"{auth_code_key}": "{{auth_code_key}}",
|
839
|
+
"{client_id_key}": "{{client_id_key}}",
|
840
|
+
"{client_secret_key}": "{{client_secret_key}}",
|
845
841
|
}
|
846
842
|
],
|
847
|
-
title="
|
843
|
+
title="Access Token Query Params (Json Encoded)",
|
848
844
|
)
|
849
845
|
extract_output: List[str] = Field(
|
850
846
|
...,
|
851
847
|
description="The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.",
|
852
|
-
examples=[
|
853
|
-
title="
|
848
|
+
examples=[["access_token", "refresh_token", "other_field"]],
|
849
|
+
title="Extract Output",
|
854
850
|
)
|
855
851
|
state: Optional[State] = Field(
|
856
852
|
None,
|
857
853
|
description="The DeclarativeOAuth Specific object to provide the criteria of how the `state` query param should be constructed,\nincluding length and complexity.",
|
858
|
-
examples=[{"
|
859
|
-
title="
|
854
|
+
examples=[{"min": 7, "max": 128}],
|
855
|
+
title="Configurable State Query Param",
|
860
856
|
)
|
861
857
|
client_id_key: Optional[str] = Field(
|
862
858
|
None,
|
863
859
|
description="The DeclarativeOAuth Specific optional override to provide the custom `client_id` key name, if required by data-provider.",
|
864
|
-
examples=[
|
865
|
-
title="
|
860
|
+
examples=["my_custom_client_id_key_name"],
|
861
|
+
title="Client ID Key Override",
|
866
862
|
)
|
867
863
|
client_secret_key: Optional[str] = Field(
|
868
864
|
None,
|
869
865
|
description="The DeclarativeOAuth Specific optional override to provide the custom `client_secret` key name, if required by data-provider.",
|
870
|
-
examples=[
|
871
|
-
title="
|
866
|
+
examples=["my_custom_client_secret_key_name"],
|
867
|
+
title="Client Secret Key Override",
|
872
868
|
)
|
873
869
|
scope_key: Optional[str] = Field(
|
874
870
|
None,
|
875
871
|
description="The DeclarativeOAuth Specific optional override to provide the custom `scope` key name, if required by data-provider.",
|
876
|
-
examples=[
|
877
|
-
title="
|
872
|
+
examples=["my_custom_scope_key_key_name"],
|
873
|
+
title="Scopes Key Override",
|
878
874
|
)
|
879
875
|
state_key: Optional[str] = Field(
|
880
876
|
None,
|
881
877
|
description="The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider.",
|
882
|
-
examples=[
|
883
|
-
title="
|
878
|
+
examples=["my_custom_state_key_key_name"],
|
879
|
+
title="State Key Override",
|
884
880
|
)
|
885
881
|
auth_code_key: Optional[str] = Field(
|
886
882
|
None,
|
887
883
|
description="The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider.",
|
888
|
-
examples=[
|
889
|
-
title="
|
884
|
+
examples=["my_custom_auth_code_key_name"],
|
885
|
+
title="Auth Code Key Override",
|
890
886
|
)
|
891
887
|
redirect_uri_key: Optional[str] = Field(
|
892
888
|
None,
|
893
889
|
description="The DeclarativeOAuth Specific optional override to provide the custom `redirect_uri` key name to something like `callback_uri`, if required by data-provider.",
|
894
|
-
examples=[
|
895
|
-
title="
|
890
|
+
examples=["my_custom_redirect_uri_key_name"],
|
891
|
+
title="Redirect URI Key Override",
|
896
892
|
)
|
897
893
|
|
898
894
|
|
@@ -1668,7 +1664,16 @@ class DeclarativeStream(BaseModel):
|
|
1668
1664
|
title="Schema Loader",
|
1669
1665
|
)
|
1670
1666
|
transformations: Optional[
|
1671
|
-
List[
|
1667
|
+
List[
|
1668
|
+
Union[
|
1669
|
+
AddFields,
|
1670
|
+
CustomTransformation,
|
1671
|
+
RemoveFields,
|
1672
|
+
KeysToLower,
|
1673
|
+
KeysToSnakeCase,
|
1674
|
+
FlattenFields,
|
1675
|
+
]
|
1676
|
+
]
|
1672
1677
|
] = Field(
|
1673
1678
|
None,
|
1674
1679
|
description="A list of transformations to be applied to each output record.",
|
@@ -1832,6 +1837,22 @@ class DynamicSchemaLoader(BaseModel):
|
|
1832
1837
|
description="Component used to coordinate how records are extracted across stream slices and request pages.",
|
1833
1838
|
title="Retriever",
|
1834
1839
|
)
|
1840
|
+
schema_transformations: Optional[
|
1841
|
+
List[
|
1842
|
+
Union[
|
1843
|
+
AddFields,
|
1844
|
+
CustomTransformation,
|
1845
|
+
RemoveFields,
|
1846
|
+
KeysToLower,
|
1847
|
+
KeysToSnakeCase,
|
1848
|
+
FlattenFields,
|
1849
|
+
]
|
1850
|
+
]
|
1851
|
+
] = Field(
|
1852
|
+
None,
|
1853
|
+
description="A list of transformations to be applied to the schema.",
|
1854
|
+
title="Schema Transformations",
|
1855
|
+
)
|
1835
1856
|
schema_type_identifier: SchemaTypeIdentifier
|
1836
1857
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1837
1858
|
|
@@ -197,6 +197,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
197
197
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
198
198
|
ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
|
199
199
|
)
|
200
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
201
|
+
FlattenFields as FlattenFieldsModel,
|
202
|
+
)
|
200
203
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
201
204
|
GzipJsonDecoder as GzipJsonDecoderModel,
|
202
205
|
)
|
@@ -236,6 +239,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
236
239
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
237
240
|
KeysToLower as KeysToLowerModel,
|
238
241
|
)
|
242
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
243
|
+
KeysToSnakeCase as KeysToSnakeCaseModel,
|
244
|
+
)
|
239
245
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
240
246
|
LegacySessionTokenAuthenticator as LegacySessionTokenAuthenticatorModel,
|
241
247
|
)
|
@@ -323,6 +329,9 @@ from airbyte_cdk.sources.declarative.partition_routers import (
|
|
323
329
|
SinglePartitionRouter,
|
324
330
|
SubstreamPartitionRouter,
|
325
331
|
)
|
332
|
+
from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
|
333
|
+
AsyncJobPartitionRouter,
|
334
|
+
)
|
326
335
|
from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import (
|
327
336
|
ParentStreamConfig,
|
328
337
|
)
|
@@ -387,9 +396,15 @@ from airbyte_cdk.sources.declarative.transformations import (
|
|
387
396
|
RemoveFields,
|
388
397
|
)
|
389
398
|
from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition
|
399
|
+
from airbyte_cdk.sources.declarative.transformations.flatten_fields import (
|
400
|
+
FlattenFields,
|
401
|
+
)
|
390
402
|
from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import (
|
391
403
|
KeysToLowerTransformation,
|
392
404
|
)
|
405
|
+
from airbyte_cdk.sources.declarative.transformations.keys_to_snake_transformation import (
|
406
|
+
KeysToSnakeCaseTransformation,
|
407
|
+
)
|
393
408
|
from airbyte_cdk.sources.message import (
|
394
409
|
InMemoryMessageRepository,
|
395
410
|
LogAppenderMessageRepositoryDecorator,
|
@@ -472,6 +487,8 @@ class ModelToComponentFactory:
|
|
472
487
|
JsonlDecoderModel: self.create_jsonl_decoder,
|
473
488
|
GzipJsonDecoderModel: self.create_gzipjson_decoder,
|
474
489
|
KeysToLowerModel: self.create_keys_to_lower_transformation,
|
490
|
+
KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
|
491
|
+
FlattenFieldsModel: self.create_flatten_fields,
|
475
492
|
IterableDecoderModel: self.create_iterable_decoder,
|
476
493
|
XmlDecoderModel: self.create_xml_decoder,
|
477
494
|
JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
|
@@ -587,6 +604,16 @@ class ModelToComponentFactory:
|
|
587
604
|
) -> KeysToLowerTransformation:
|
588
605
|
return KeysToLowerTransformation()
|
589
606
|
|
607
|
+
def create_keys_to_snake_transformation(
|
608
|
+
self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any
|
609
|
+
) -> KeysToSnakeCaseTransformation:
|
610
|
+
return KeysToSnakeCaseTransformation()
|
611
|
+
|
612
|
+
def create_flatten_fields(
|
613
|
+
self, model: FlattenFieldsModel, config: Config, **kwargs: Any
|
614
|
+
) -> FlattenFields:
|
615
|
+
return FlattenFields()
|
616
|
+
|
590
617
|
@staticmethod
|
591
618
|
def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]:
|
592
619
|
if not value_type:
|
@@ -1638,6 +1665,13 @@ class ModelToComponentFactory:
|
|
1638
1665
|
model.retriever, stream_slicer
|
1639
1666
|
)
|
1640
1667
|
|
1668
|
+
schema_transformations = []
|
1669
|
+
if model.schema_transformations:
|
1670
|
+
for transformation_model in model.schema_transformations:
|
1671
|
+
schema_transformations.append(
|
1672
|
+
self._create_component_from_model(model=transformation_model, config=config)
|
1673
|
+
)
|
1674
|
+
|
1641
1675
|
retriever = self._create_component_from_model(
|
1642
1676
|
model=model.retriever,
|
1643
1677
|
config=config,
|
@@ -1652,6 +1686,7 @@ class ModelToComponentFactory:
|
|
1652
1686
|
return DynamicSchemaLoader(
|
1653
1687
|
retriever=retriever,
|
1654
1688
|
config=config,
|
1689
|
+
schema_transformations=schema_transformations,
|
1655
1690
|
schema_type_identifier=schema_type_identifier,
|
1656
1691
|
parameters=model.parameters or {},
|
1657
1692
|
)
|
@@ -2228,22 +2263,28 @@ class ModelToComponentFactory:
|
|
2228
2263
|
urls_extractor=urls_extractor,
|
2229
2264
|
)
|
2230
2265
|
|
2231
|
-
|
2266
|
+
async_job_partition_router = AsyncJobPartitionRouter(
|
2232
2267
|
job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator(
|
2233
2268
|
job_repository,
|
2234
2269
|
stream_slices,
|
2235
|
-
JobTracker(
|
2236
|
-
|
2237
|
-
), # FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1
|
2270
|
+
JobTracker(1),
|
2271
|
+
# FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1
|
2238
2272
|
self._message_repository,
|
2239
|
-
has_bulk_parent=False,
|
2273
|
+
has_bulk_parent=False,
|
2274
|
+
# FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
|
2240
2275
|
),
|
2241
|
-
record_selector=record_selector,
|
2242
2276
|
stream_slicer=stream_slicer,
|
2243
2277
|
config=config,
|
2244
2278
|
parameters=model.parameters or {},
|
2245
2279
|
)
|
2246
2280
|
|
2281
|
+
return AsyncRetriever(
|
2282
|
+
record_selector=record_selector,
|
2283
|
+
stream_slicer=async_job_partition_router,
|
2284
|
+
config=config,
|
2285
|
+
parameters=model.parameters or {},
|
2286
|
+
)
|
2287
|
+
|
2247
2288
|
@staticmethod
|
2248
2289
|
def create_spec(model: SpecModel, config: Config, **kwargs: Any) -> Spec:
|
2249
2290
|
return Spec(
|
@@ -2353,7 +2394,7 @@ class ModelToComponentFactory:
|
|
2353
2394
|
config=config,
|
2354
2395
|
name="",
|
2355
2396
|
primary_key=None,
|
2356
|
-
stream_slicer=combined_slicers,
|
2397
|
+
stream_slicer=stream_slicer if stream_slicer else combined_slicers,
|
2357
2398
|
transformations=[],
|
2358
2399
|
)
|
2359
2400
|
|
@@ -2,10 +2,18 @@
|
|
2
2
|
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
+
from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import AsyncJobPartitionRouter
|
5
6
|
from airbyte_cdk.sources.declarative.partition_routers.cartesian_product_stream_slicer import CartesianProductStreamSlicer
|
6
7
|
from airbyte_cdk.sources.declarative.partition_routers.list_partition_router import ListPartitionRouter
|
7
8
|
from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import SinglePartitionRouter
|
8
9
|
from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import SubstreamPartitionRouter
|
9
10
|
from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
|
10
11
|
|
11
|
-
__all__ = [
|
12
|
+
__all__ = [
|
13
|
+
"AsyncJobPartitionRouter",
|
14
|
+
"CartesianProductStreamSlicer",
|
15
|
+
"ListPartitionRouter",
|
16
|
+
"SinglePartitionRouter",
|
17
|
+
"SubstreamPartitionRouter",
|
18
|
+
"PartitionRouter"
|
19
|
+
]
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from dataclasses import InitVar, dataclass, field
|
4
|
+
from typing import Any, Callable, Iterable, Mapping, Optional
|
5
|
+
|
6
|
+
from airbyte_cdk.models import FailureType
|
7
|
+
from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
|
8
|
+
AsyncJobOrchestrator,
|
9
|
+
AsyncPartition,
|
10
|
+
)
|
11
|
+
from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import (
|
12
|
+
SinglePartitionRouter,
|
13
|
+
)
|
14
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
|
15
|
+
from airbyte_cdk.sources.types import Config, StreamSlice
|
16
|
+
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
17
|
+
|
18
|
+
|
19
|
+
@dataclass
|
20
|
+
class AsyncJobPartitionRouter(StreamSlicer):
|
21
|
+
"""
|
22
|
+
Partition router that creates async jobs in a source API, periodically polls for job
|
23
|
+
completion, and supplies the completed job URL locations as stream slices so that
|
24
|
+
records can be extracted.
|
25
|
+
"""
|
26
|
+
|
27
|
+
config: Config
|
28
|
+
parameters: InitVar[Mapping[str, Any]]
|
29
|
+
job_orchestrator_factory: Callable[[Iterable[StreamSlice]], AsyncJobOrchestrator]
|
30
|
+
stream_slicer: StreamSlicer = field(
|
31
|
+
default_factory=lambda: SinglePartitionRouter(parameters={})
|
32
|
+
)
|
33
|
+
|
34
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
35
|
+
self._job_orchestrator_factory = self.job_orchestrator_factory
|
36
|
+
self._job_orchestrator: Optional[AsyncJobOrchestrator] = None
|
37
|
+
self._parameters = parameters
|
38
|
+
|
39
|
+
def stream_slices(self) -> Iterable[StreamSlice]:
|
40
|
+
slices = self.stream_slicer.stream_slices()
|
41
|
+
self._job_orchestrator = self._job_orchestrator_factory(slices)
|
42
|
+
|
43
|
+
for completed_partition in self._job_orchestrator.create_and_get_completed_partitions():
|
44
|
+
yield StreamSlice(
|
45
|
+
partition=dict(completed_partition.stream_slice.partition)
|
46
|
+
| {"partition": completed_partition},
|
47
|
+
cursor_slice=completed_partition.stream_slice.cursor_slice,
|
48
|
+
)
|
49
|
+
|
50
|
+
def fetch_records(self, partition: AsyncPartition) -> Iterable[Mapping[str, Any]]:
|
51
|
+
"""
|
52
|
+
This method of fetching records extends beyond what a PartitionRouter/StreamSlicer should
|
53
|
+
be responsible for. However, this was added in because the JobOrchestrator is required to
|
54
|
+
retrieve records. And without defining fetch_records() on this class, we're stuck with either
|
55
|
+
passing the JobOrchestrator to the AsyncRetriever or storing it on multiple classes.
|
56
|
+
"""
|
57
|
+
|
58
|
+
if not self._job_orchestrator:
|
59
|
+
raise AirbyteTracedException(
|
60
|
+
message="Invalid state within AsyncJobRetriever. Please contact Airbyte Support",
|
61
|
+
internal_message="AsyncPartitionRepository is expected to be accessed only after `stream_slices`",
|
62
|
+
failure_type=FailureType.system_error,
|
63
|
+
)
|
64
|
+
|
65
|
+
return self._job_orchestrator.fetch_records(partition=partition)
|
@@ -88,19 +88,25 @@ class HttpComponentsResolver(ComponentsResolver):
|
|
88
88
|
"""
|
89
89
|
kwargs = {"stream_template_config": stream_template_config}
|
90
90
|
|
91
|
-
for
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
91
|
+
for stream_slice in self.retriever.stream_slices():
|
92
|
+
for components_values in self.retriever.read_records(
|
93
|
+
records_schema={}, stream_slice=stream_slice
|
94
|
+
):
|
95
|
+
updated_config = deepcopy(stream_template_config)
|
96
|
+
kwargs["components_values"] = components_values # type: ignore[assignment] # component_values will always be of type Mapping[str, Any]
|
97
|
+
kwargs["stream_slice"] = stream_slice # type: ignore[assignment] # stream_slice will always be of type Mapping[str, Any]
|
98
|
+
|
99
|
+
for resolved_component in self._resolved_components:
|
100
|
+
valid_types = (
|
101
|
+
(resolved_component.value_type,) if resolved_component.value_type else None
|
102
|
+
)
|
103
|
+
value = resolved_component.value.eval(
|
104
|
+
self.config, valid_types=valid_types, **kwargs
|
105
|
+
)
|
102
106
|
|
103
|
-
|
104
|
-
|
107
|
+
path = [
|
108
|
+
path.eval(self.config, **kwargs) for path in resolved_component.field_path
|
109
|
+
]
|
110
|
+
dpath.set(updated_config, path, value)
|
105
111
|
|
106
|
-
|
112
|
+
yield updated_config
|
@@ -1,8 +1,8 @@
|
|
1
1
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
2
|
|
3
3
|
|
4
|
-
from dataclasses import InitVar, dataclass
|
5
|
-
from typing import Any,
|
4
|
+
from dataclasses import InitVar, dataclass
|
5
|
+
from typing import Any, Iterable, Mapping, Optional
|
6
6
|
|
7
7
|
from typing_extensions import deprecated
|
8
8
|
|
@@ -12,9 +12,10 @@ from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
|
|
12
12
|
AsyncPartition,
|
13
13
|
)
|
14
14
|
from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
|
15
|
-
from airbyte_cdk.sources.declarative.partition_routers import
|
15
|
+
from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
|
16
|
+
AsyncJobPartitionRouter,
|
17
|
+
)
|
16
18
|
from airbyte_cdk.sources.declarative.retrievers import Retriever
|
17
|
-
from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
|
18
19
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
19
20
|
from airbyte_cdk.sources.streams.core import StreamData
|
20
21
|
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
@@ -29,15 +30,10 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
|
29
30
|
class AsyncRetriever(Retriever):
|
30
31
|
config: Config
|
31
32
|
parameters: InitVar[Mapping[str, Any]]
|
32
|
-
job_orchestrator_factory: Callable[[Iterable[StreamSlice]], AsyncJobOrchestrator]
|
33
33
|
record_selector: RecordSelector
|
34
|
-
stream_slicer:
|
35
|
-
default_factory=lambda: SinglePartitionRouter(parameters={})
|
36
|
-
)
|
34
|
+
stream_slicer: AsyncJobPartitionRouter
|
37
35
|
|
38
36
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
39
|
-
self._job_orchestrator_factory = self.job_orchestrator_factory
|
40
|
-
self.__job_orchestrator: Optional[AsyncJobOrchestrator] = None
|
41
37
|
self._parameters = parameters
|
42
38
|
|
43
39
|
@property
|
@@ -54,17 +50,6 @@ class AsyncRetriever(Retriever):
|
|
54
50
|
"""
|
55
51
|
pass
|
56
52
|
|
57
|
-
@property
|
58
|
-
def _job_orchestrator(self) -> AsyncJobOrchestrator:
|
59
|
-
if not self.__job_orchestrator:
|
60
|
-
raise AirbyteTracedException(
|
61
|
-
message="Invalid state within AsyncJobRetriever. Please contact Airbyte Support",
|
62
|
-
internal_message="AsyncPartitionRepository is expected to be accessed only after `stream_slices`",
|
63
|
-
failure_type=FailureType.system_error,
|
64
|
-
)
|
65
|
-
|
66
|
-
return self.__job_orchestrator
|
67
|
-
|
68
53
|
def _get_stream_state(self) -> StreamState:
|
69
54
|
"""
|
70
55
|
Gets the current state of the stream.
|
@@ -99,15 +84,7 @@ class AsyncRetriever(Retriever):
|
|
99
84
|
return stream_slice["partition"] # type: ignore # stream_slice["partition"] has been added as an AsyncPartition as part of stream_slices
|
100
85
|
|
101
86
|
def stream_slices(self) -> Iterable[Optional[StreamSlice]]:
|
102
|
-
|
103
|
-
self.__job_orchestrator = self._job_orchestrator_factory(slices)
|
104
|
-
|
105
|
-
for completed_partition in self._job_orchestrator.create_and_get_completed_partitions():
|
106
|
-
yield StreamSlice(
|
107
|
-
partition=dict(completed_partition.stream_slice.partition)
|
108
|
-
| {"partition": completed_partition},
|
109
|
-
cursor_slice=completed_partition.stream_slice.cursor_slice,
|
110
|
-
)
|
87
|
+
return self.stream_slicer.stream_slices()
|
111
88
|
|
112
89
|
def read_records(
|
113
90
|
self,
|
@@ -116,7 +93,7 @@ class AsyncRetriever(Retriever):
|
|
116
93
|
) -> Iterable[StreamData]:
|
117
94
|
stream_state: StreamState = self._get_stream_state()
|
118
95
|
partition: AsyncPartition = self._validate_and_get_stream_slice_partition(stream_slice)
|
119
|
-
records: Iterable[Mapping[str, Any]] = self.
|
96
|
+
records: Iterable[Mapping[str, Any]] = self.stream_slicer.fetch_records(partition)
|
120
97
|
|
121
98
|
yield from self.record_selector.filter_and_transform(
|
122
99
|
all_data=records,
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
|
6
6
|
from copy import deepcopy
|
7
|
-
from dataclasses import InitVar, dataclass
|
7
|
+
from dataclasses import InitVar, dataclass, field
|
8
8
|
from typing import Any, List, Mapping, MutableMapping, Optional, Union
|
9
9
|
|
10
10
|
import dpath
|
@@ -13,8 +13,9 @@ from typing_extensions import deprecated
|
|
13
13
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
14
14
|
from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
15
15
|
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
|
16
|
+
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
16
17
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
17
|
-
from airbyte_cdk.sources.types import Config
|
18
|
+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
18
19
|
|
19
20
|
AIRBYTE_DATA_TYPES: Mapping[str, Mapping[str, Any]] = {
|
20
21
|
"string": {"type": ["null", "string"]},
|
@@ -103,6 +104,7 @@ class DynamicSchemaLoader(SchemaLoader):
|
|
103
104
|
config: Config
|
104
105
|
parameters: InitVar[Mapping[str, Any]]
|
105
106
|
schema_type_identifier: SchemaTypeIdentifier
|
107
|
+
schema_transformations: List[RecordTransformation] = field(default_factory=lambda: [])
|
106
108
|
|
107
109
|
def get_json_schema(self) -> Mapping[str, Any]:
|
108
110
|
"""
|
@@ -128,12 +130,27 @@ class DynamicSchemaLoader(SchemaLoader):
|
|
128
130
|
)
|
129
131
|
properties[key] = value
|
130
132
|
|
133
|
+
transformed_properties = self._transform(properties, {})
|
134
|
+
|
131
135
|
return {
|
132
136
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
133
137
|
"type": "object",
|
134
|
-
"properties":
|
138
|
+
"properties": transformed_properties,
|
135
139
|
}
|
136
140
|
|
141
|
+
def _transform(
|
142
|
+
self,
|
143
|
+
properties: Mapping[str, Any],
|
144
|
+
stream_state: StreamState,
|
145
|
+
stream_slice: Optional[StreamSlice] = None,
|
146
|
+
) -> Mapping[str, Any]:
|
147
|
+
for transformation in self.schema_transformations:
|
148
|
+
transformation.transform(
|
149
|
+
properties, # type: ignore # properties has type Mapping[str, Any], but Dict[str, Any] expected
|
150
|
+
config=self.config,
|
151
|
+
)
|
152
|
+
return properties
|
153
|
+
|
137
154
|
def _get_key(
|
138
155
|
self,
|
139
156
|
raw_schema: MutableMapping[str, Any],
|
@@ -0,0 +1,50 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from dataclasses import dataclass
|
6
|
+
from typing import Any, Dict, Optional
|
7
|
+
|
8
|
+
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
9
|
+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
10
|
+
|
11
|
+
|
12
|
+
@dataclass
|
13
|
+
class FlattenFields(RecordTransformation):
|
14
|
+
def transform(
|
15
|
+
self,
|
16
|
+
record: Dict[str, Any],
|
17
|
+
config: Optional[Config] = None,
|
18
|
+
stream_state: Optional[StreamState] = None,
|
19
|
+
stream_slice: Optional[StreamSlice] = None,
|
20
|
+
) -> None:
|
21
|
+
transformed_record = self.flatten_record(record)
|
22
|
+
record.clear()
|
23
|
+
record.update(transformed_record)
|
24
|
+
|
25
|
+
def flatten_record(self, record: Dict[str, Any]) -> Dict[str, Any]:
|
26
|
+
stack = [(record, "_")]
|
27
|
+
transformed_record: Dict[str, Any] = {}
|
28
|
+
force_with_parent_name = False
|
29
|
+
|
30
|
+
while stack:
|
31
|
+
current_record, parent_key = stack.pop()
|
32
|
+
|
33
|
+
if isinstance(current_record, dict):
|
34
|
+
for current_key, value in current_record.items():
|
35
|
+
new_key = (
|
36
|
+
f"{parent_key}.{current_key}"
|
37
|
+
if (current_key in transformed_record or force_with_parent_name)
|
38
|
+
else current_key
|
39
|
+
)
|
40
|
+
stack.append((value, new_key))
|
41
|
+
|
42
|
+
elif isinstance(current_record, list):
|
43
|
+
for i, item in enumerate(current_record):
|
44
|
+
force_with_parent_name = True
|
45
|
+
stack.append((item, f"{parent_key}.{i}"))
|
46
|
+
|
47
|
+
else:
|
48
|
+
transformed_record[parent_key] = current_record
|
49
|
+
|
50
|
+
return transformed_record
|
@@ -0,0 +1,68 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
import re
|
6
|
+
from dataclasses import dataclass
|
7
|
+
from typing import Any, Dict, List, Optional
|
8
|
+
|
9
|
+
import unidecode
|
10
|
+
|
11
|
+
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
12
|
+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
13
|
+
|
14
|
+
|
15
|
+
@dataclass
|
16
|
+
class KeysToSnakeCaseTransformation(RecordTransformation):
|
17
|
+
token_pattern: re.Pattern[str] = re.compile(
|
18
|
+
r"[A-Z]+[a-z]*|[a-z]+|\d+|(?P<NoToken>[^a-zA-Z\d]+)"
|
19
|
+
)
|
20
|
+
|
21
|
+
def transform(
|
22
|
+
self,
|
23
|
+
record: Dict[str, Any],
|
24
|
+
config: Optional[Config] = None,
|
25
|
+
stream_state: Optional[StreamState] = None,
|
26
|
+
stream_slice: Optional[StreamSlice] = None,
|
27
|
+
) -> None:
|
28
|
+
transformed_record = self._transform_record(record)
|
29
|
+
record.clear()
|
30
|
+
record.update(transformed_record)
|
31
|
+
|
32
|
+
def _transform_record(self, record: Dict[str, Any]) -> Dict[str, Any]:
|
33
|
+
transformed_record = {}
|
34
|
+
for key, value in record.items():
|
35
|
+
transformed_key = self.process_key(key)
|
36
|
+
transformed_value = value
|
37
|
+
|
38
|
+
if isinstance(value, dict):
|
39
|
+
transformed_value = self._transform_record(value)
|
40
|
+
|
41
|
+
transformed_record[transformed_key] = transformed_value
|
42
|
+
return transformed_record
|
43
|
+
|
44
|
+
def process_key(self, key: str) -> str:
|
45
|
+
key = self.normalize_key(key)
|
46
|
+
tokens = self.tokenize_key(key)
|
47
|
+
tokens = self.filter_tokens(tokens)
|
48
|
+
return self.tokens_to_snake_case(tokens)
|
49
|
+
|
50
|
+
def normalize_key(self, key: str) -> str:
|
51
|
+
return unidecode.unidecode(key)
|
52
|
+
|
53
|
+
def tokenize_key(self, key: str) -> List[str]:
|
54
|
+
tokens = []
|
55
|
+
for match in self.token_pattern.finditer(key):
|
56
|
+
token = match.group(0) if match.group("NoToken") is None else ""
|
57
|
+
tokens.append(token)
|
58
|
+
return tokens
|
59
|
+
|
60
|
+
def filter_tokens(self, tokens: List[str]) -> List[str]:
|
61
|
+
if len(tokens) >= 3:
|
62
|
+
tokens = tokens[:1] + [t for t in tokens[1:-1] if t] + tokens[-1:]
|
63
|
+
if tokens and tokens[0].isdigit():
|
64
|
+
tokens.insert(0, "")
|
65
|
+
return tokens
|
66
|
+
|
67
|
+
def tokens_to_snake_case(self, tokens: List[str]) -> str:
|
68
|
+
return "_".join(token.lower() for token in tokens)
|
@@ -223,17 +223,17 @@ class Stream(ABC):
|
|
223
223
|
record_counter += 1
|
224
224
|
|
225
225
|
checkpoint_interval = self.state_checkpoint_interval
|
226
|
+
checkpoint = checkpoint_reader.get_checkpoint()
|
226
227
|
if (
|
227
228
|
should_checkpoint
|
228
229
|
and checkpoint_interval
|
229
230
|
and record_counter % checkpoint_interval == 0
|
231
|
+
and checkpoint is not None
|
230
232
|
):
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
)
|
236
|
-
yield airbyte_state_message
|
233
|
+
airbyte_state_message = self._checkpoint_state(
|
234
|
+
checkpoint, state_manager=state_manager
|
235
|
+
)
|
236
|
+
yield airbyte_state_message
|
237
237
|
|
238
238
|
if internal_config.is_limit_reached(record_counter):
|
239
239
|
break
|
@@ -262,7 +262,7 @@ class HttpClient:
|
|
262
262
|
user_backoff_handler = user_defined_backoff_handler(max_tries=max_tries, max_time=max_time)(
|
263
263
|
self._send
|
264
264
|
)
|
265
|
-
rate_limit_backoff_handler = rate_limit_default_backoff_handler()
|
265
|
+
rate_limit_backoff_handler = rate_limit_default_backoff_handler(max_tries=max_tries)
|
266
266
|
backoff_handler = http_client_default_backoff_handler(
|
267
267
|
max_tries=max_tries, max_time=max_time
|
268
268
|
)
|
@@ -472,7 +472,9 @@ class HttpClient:
|
|
472
472
|
|
473
473
|
elif retry_endlessly:
|
474
474
|
raise RateLimitBackoffException(
|
475
|
-
request=request,
|
475
|
+
request=request,
|
476
|
+
response=(response if response is not None else exc),
|
477
|
+
error_message=error_message,
|
476
478
|
)
|
477
479
|
|
478
480
|
raise DefaultBackoffException(
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: airbyte-cdk
|
3
|
-
Version: 6.12.
|
3
|
+
Version: 6.12.2
|
4
4
|
Summary: A framework for writing Airbyte Connectors.
|
5
5
|
Home-page: https://airbyte.com
|
6
6
|
License: MIT
|
@@ -22,6 +22,7 @@ Provides-Extra: sql
|
|
22
22
|
Provides-Extra: vector-db-based
|
23
23
|
Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
|
24
24
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
25
|
+
Requires-Dist: Unidecode (>=1.3,<2.0)
|
25
26
|
Requires-Dist: airbyte-protocol-models-dataclasses (>=0.14,<0.15)
|
26
27
|
Requires-Dist: avro (>=1.11.2,<1.12.0) ; extra == "file-based"
|
27
28
|
Requires-Dist: backoff
|
@@ -56,7 +57,7 @@ Requires-Dist: python-calamine (==0.2.3) ; extra == "file-based"
|
|
56
57
|
Requires-Dist: python-dateutil
|
57
58
|
Requires-Dist: python-snappy (==0.7.3) ; extra == "file-based"
|
58
59
|
Requires-Dist: python-ulid (>=3.0.0,<4.0.0)
|
59
|
-
Requires-Dist: pytz (==2024.
|
60
|
+
Requires-Dist: pytz (==2024.2)
|
60
61
|
Requires-Dist: rapidfuzz (>=3.10.1,<4.0.0)
|
61
62
|
Requires-Dist: requests
|
62
63
|
Requires-Dist: requests_cache
|
@@ -66,7 +66,7 @@ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=PxP4p268
|
|
66
66
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
|
67
67
|
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
|
68
68
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
|
69
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=
|
69
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=wnrMdGjgTS_i-ikm4NNgFgTubtSafytAvkBMHqL94Ao,129417
|
70
70
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
|
71
71
|
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
|
72
72
|
airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=hNlhaB5FjNC6IfJyglj5ZJWkYD2nEAukMDmzRz5PC6o,671
|
@@ -104,13 +104,14 @@ airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW
|
|
104
104
|
airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
|
105
105
|
airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
|
106
106
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
|
107
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256
|
107
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=kESlZ8F2i2q3BMZfEhwEk0XEzxr7SWtJti6AoPsthg0,90831
|
108
108
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
109
109
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
|
110
110
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
|
111
111
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
|
112
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256
|
113
|
-
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=
|
112
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=-9l95OSRfoH2AsCqK0Ub_OE_e504wUYVmhHuVUPfuj0,106067
|
113
|
+
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=974SY1RFwitUCiiDHuFHDGmSNu1D72z3bSTpvlBwAho,911
|
114
|
+
airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=n82J15S8bjeMZ5uROu--P3hnbQoxkY5v7RPHYx7g7ro,2929
|
114
115
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
115
116
|
airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha256=t7pRdFWfFWJtQQG19c9PVeMODyO2BknRTakpM5U9N-8,4844
|
116
117
|
airbyte_cdk/sources/declarative/partition_routers/partition_router.py,sha256=YyEIzdmLd1FjbVP3QbQ2VFCLW_P-OGbVh6VpZShp54k,2218
|
@@ -155,14 +156,14 @@ airbyte_cdk/sources/declarative/requesters/requester.py,sha256=iVVpXQ4KEd9OyZNwm
|
|
155
156
|
airbyte_cdk/sources/declarative/resolvers/__init__.py,sha256=RAwq1VrkC0kAaIkmKkL7so8ZeUzF0MgUQ0tciGkY7v4,1116
|
156
157
|
airbyte_cdk/sources/declarative/resolvers/components_resolver.py,sha256=KPjKc0yb9artL4ZkeqN8RmEykHH6FJgqXD7fCEnh1X0,1936
|
157
158
|
airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py,sha256=dz4iJV9liD_LzY_Mn4XmAStoUll60R3MIGWV4aN3pgg,5223
|
158
|
-
airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=
|
159
|
+
airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=AiojNs8wItJFrENZBFUaDvau3sgwudO6Wkra36upSPo,4639
|
159
160
|
airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=FVQpUGVwp2Gibk4gp07VmLKX5AafUlsZWFSrDpUDuJM,443
|
160
|
-
airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=
|
161
|
+
airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=3jgor7a6_s_9KgqHmPk6cWMDZ-6OugFPjCajIkC3Onw,3721
|
161
162
|
airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
|
162
163
|
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=N4swGw5mfuTXJ2R7AKX18CHzizsr69pXwt5uSHLPi48,24172
|
163
164
|
airbyte_cdk/sources/declarative/schema/__init__.py,sha256=Io9vninzlEjQ2uFmWklxfwNM0cXfljtzOz5zL1OVyT4,701
|
164
165
|
airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=KTACrIE23a83wsm3Rd9Eb4K6-20lrGqYxTHNp9yxsso,1820
|
165
|
-
airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=
|
166
|
+
airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=H6A3NQ6kPPM-cUNPmdvDPc9xNzR1rQNrK95GbgCW334,8822
|
166
167
|
airbyte_cdk/sources/declarative/schema/inline_schema_loader.py,sha256=bVETE10hRsatRJq3R3BeyRR0wIoK3gcP1gcpVRQ_P5U,464
|
167
168
|
airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py,sha256=5Wl-fqW-pVf_dxJ4yGHMAFfC4JjKHYJhqFJT1xA57F4,4177
|
168
169
|
airbyte_cdk/sources/declarative/schema/schema_loader.py,sha256=kjt8v0N5wWKA5zyLnrDLxf1PJKdUqvQq2RVnAOAzNSY,379
|
@@ -173,7 +174,9 @@ airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.p
|
|
173
174
|
airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py,sha256=SOkIPBi2Wu7yxIvA15yFzUAB95a3IzA8LPq5DEqHQQc,725
|
174
175
|
airbyte_cdk/sources/declarative/transformations/__init__.py,sha256=CPJ8TlMpiUmvG3624VYu_NfTzxwKcfBjM2Q2wJ7fkSA,919
|
175
176
|
airbyte_cdk/sources/declarative/transformations/add_fields.py,sha256=r4YdAuAk2bQtNWJMztIIy2CC-NglD9NeK1s1TeO9wkw,5027
|
177
|
+
airbyte_cdk/sources/declarative/transformations/flatten_fields.py,sha256=ti9fLVk-EpMeDY7ImduvQq1YGounLYmH9dHzp7MIRxk,1703
|
176
178
|
airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py,sha256=RTs5KX4V3hM7A6QN1WlGF21YccTIyNH6qQI9IMb__hw,670
|
179
|
+
airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py,sha256=43zwe6_F5ba5C4eY0RgXxPz7ndPKZfXGChHepFn-2lk,2263
|
177
180
|
airbyte_cdk/sources/declarative/transformations/remove_fields.py,sha256=EwUP0SZ2p4GRJ6Q8CUzlz9dcUeEidEFDlI2IBye2tlc,2745
|
178
181
|
airbyte_cdk/sources/declarative/transformations/transformation.py,sha256=4sXtx9cNY2EHUPq-xHvDs8GQEBUy3Eo6TkRLKHPXx68,1161
|
179
182
|
airbyte_cdk/sources/declarative/types.py,sha256=yqx0xlZv_76tkC7fqJKefmvl4GJJ8mXbeddwVV8XRJU,778
|
@@ -264,7 +267,7 @@ airbyte_cdk/sources/streams/concurrent/partitions/types.py,sha256=frPVvHtY7vLxpG
|
|
264
267
|
airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
265
268
|
airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py,sha256=CXHUMOhndu-LOKgsnNTItv5s5qrKpmJDeHOzlH1nBy8,6819
|
266
269
|
airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py,sha256=syjdxEoElIOzqVS5Jrm5FOR70jsbBdttEO_3Iz12Jyo,7523
|
267
|
-
airbyte_cdk/sources/streams/core.py,sha256=
|
270
|
+
airbyte_cdk/sources/streams/core.py,sha256=z4Oi5qmJPjs-RdMd5tPWHvHqIjkcxhkVKTvIpfAs2uA,32211
|
268
271
|
airbyte_cdk/sources/streams/http/__init__.py,sha256=NXaNlkzZMkh5kS8S5ujEaKEE6855sk6_HljF_GFjKZI,311
|
269
272
|
airbyte_cdk/sources/streams/http/availability_strategy.py,sha256=sovoGFThZr-doMN9vJvTuJBrvkwQVIO0qTQO64pGZPY,2428
|
270
273
|
airbyte_cdk/sources/streams/http/error_handlers/__init__.py,sha256=R8OgTcratGH4f6BbYM2Hp8qYyEk7wMYuyda5H9ohGW8,665
|
@@ -278,7 +281,7 @@ airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py,sha
|
|
278
281
|
airbyte_cdk/sources/streams/http/error_handlers/response_models.py,sha256=xGIVELBFY0TmH9aUq1ikoqJz8oHLr6di2JLvKWVEO-s,2236
|
279
282
|
airbyte_cdk/sources/streams/http/exceptions.py,sha256=njC7MlMJoFYcSGz4mIp6-bqLFTr6vC8ej25X0oSeyjE,1824
|
280
283
|
airbyte_cdk/sources/streams/http/http.py,sha256=JAMpiTdS9HFNOlwayWNvQdxoqs2rpW9wdYlhFHv_1Q4,28496
|
281
|
-
airbyte_cdk/sources/streams/http/http_client.py,sha256=
|
284
|
+
airbyte_cdk/sources/streams/http/http_client.py,sha256=tDE0ROtxjGMVphvsw8INvGMtZ97hIF-v47pZ3jIyiwc,23011
|
282
285
|
airbyte_cdk/sources/streams/http/rate_limiting.py,sha256=IwdjrHKUnU97XO4qONgYRv4YYW51xQ8SJm4WLafXDB8,6351
|
283
286
|
airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py,sha256=RN0D3nOX1xLgwEwKWu6pkGy3XqBFzKSNZ8Lf6umU2eY,413
|
284
287
|
airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py,sha256=nxI94yJ3bGfpDO8RR3QvOJ-PSW0n9CElSAkgl5ae80Y,10321
|
@@ -336,8 +339,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
|
|
336
339
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
337
340
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
338
341
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
339
|
-
airbyte_cdk-6.12.
|
340
|
-
airbyte_cdk-6.12.
|
341
|
-
airbyte_cdk-6.12.
|
342
|
-
airbyte_cdk-6.12.
|
343
|
-
airbyte_cdk-6.12.
|
342
|
+
airbyte_cdk-6.12.2.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
343
|
+
airbyte_cdk-6.12.2.dist-info/METADATA,sha256=NscmIGw1p7Qbi33_lKCZ01teTeqBPAX057dVpDDYwmE,5988
|
344
|
+
airbyte_cdk-6.12.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
345
|
+
airbyte_cdk-6.12.2.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
346
|
+
airbyte_cdk-6.12.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|