airbyte-cdk 6.45.0.dev4107__py3-none-any.whl → 6.45.0.post20.dev14369762306__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/connector_builder_handler.py +45 -6
- airbyte_cdk/connector_builder/main.py +5 -2
- airbyte_cdk/models/__init__.py +0 -1
- airbyte_cdk/models/airbyte_protocol.py +3 -1
- airbyte_cdk/models/file_transfer_record_message.py +13 -0
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +1 -1
- airbyte_cdk/sources/declarative/async_job/job.py +6 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +18 -18
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +22 -6
- airbyte_cdk/sources/declarative/checks/__init__.py +5 -2
- airbyte_cdk/sources/declarative/checks/check_stream.py +113 -11
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +0 -8
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +210 -50
- airbyte_cdk/sources/declarative/extractors/record_selector.py +1 -6
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +2 -1
- airbyte_cdk/sources/declarative/interpolation/macros.py +10 -4
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +23 -2
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +142 -43
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +16 -4
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +263 -50
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +4 -0
- airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +150 -0
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +5 -1
- airbyte_cdk/sources/declarative/requesters/query_properties/__init__.py +13 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/properties_from_endpoint.py +40 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/property_chunking.py +69 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/query_properties.py +58 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/strategies/__init__.py +10 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/strategies/group_by_key.py +33 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/strategies/merge_strategy.py +19 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +25 -2
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +101 -30
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +1 -1
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +4 -9
- airbyte_cdk/sources/declarative/transformations/add_fields.py +3 -1
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +15 -38
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +15 -8
- airbyte_cdk/sources/file_based/schema_helpers.py +1 -9
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +12 -3
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +31 -16
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +3 -1
- airbyte_cdk/sources/streams/concurrent/default_stream.py +0 -3
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +4 -0
- airbyte_cdk/sources/types.py +2 -11
- airbyte_cdk/sources/utils/record_helper.py +8 -8
- airbyte_cdk/test/declarative/__init__.py +6 -0
- airbyte_cdk/test/declarative/models/__init__.py +7 -0
- airbyte_cdk/test/declarative/models/scenario.py +74 -0
- airbyte_cdk/test/declarative/test_suites/__init__.py +24 -0
- airbyte_cdk/test/declarative/test_suites/connector_base.py +197 -0
- airbyte_cdk/test/declarative/test_suites/declarative_sources.py +47 -0
- airbyte_cdk/test/declarative/test_suites/destination_base.py +12 -0
- airbyte_cdk/test/declarative/test_suites/source_base.py +129 -0
- airbyte_cdk/test/declarative/utils/__init__.py +0 -0
- airbyte_cdk/test/declarative/utils/job_runner.py +128 -0
- airbyte_cdk/test/entrypoint_wrapper.py +4 -0
- airbyte_cdk/test/fixtures/__init__.py +0 -0
- airbyte_cdk/test/fixtures/auto.py +14 -0
- airbyte_cdk/test/fixtures/general.py +15 -0
- airbyte_cdk/test/mock_http/response_builder.py +0 -8
- airbyte_cdk/test/pytest_config/plugin.py +40 -0
- {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post20.dev14369762306.dist-info}/METADATA +2 -2
- {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post20.dev14369762306.dist-info}/RECORD +67 -47
- airbyte_cdk/sources/declarative/retrievers/file_uploader.py +0 -89
- airbyte_cdk/sources/file_based/file_record_data.py +0 -22
- airbyte_cdk/sources/utils/files_directory.py +0 -15
- {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post20.dev14369762306.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post20.dev14369762306.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post20.dev14369762306.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post20.dev14369762306.dist-info}/entry_points.txt +0 -0
@@ -47,7 +47,12 @@ properties:
|
|
47
47
|
max_concurrent_async_job_count:
|
48
48
|
title: Maximum Concurrent Asynchronous Jobs
|
49
49
|
description: Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.
|
50
|
-
type:
|
50
|
+
type:
|
51
|
+
- integer
|
52
|
+
- string
|
53
|
+
examples:
|
54
|
+
- 3
|
55
|
+
- "{{ config['max_concurrent_async_job_count'] }}"
|
51
56
|
metadata:
|
52
57
|
type: object
|
53
58
|
description: For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.
|
@@ -311,7 +316,6 @@ definitions:
|
|
311
316
|
type: object
|
312
317
|
required:
|
313
318
|
- type
|
314
|
-
- stream_names
|
315
319
|
properties:
|
316
320
|
type:
|
317
321
|
type: string
|
@@ -325,6 +329,28 @@ definitions:
|
|
325
329
|
examples:
|
326
330
|
- ["users"]
|
327
331
|
- ["users", "contacts"]
|
332
|
+
dynamic_streams_check_configs:
|
333
|
+
type: array
|
334
|
+
items:
|
335
|
+
"$ref": "#/definitions/DynamicStreamCheckConfig"
|
336
|
+
DynamicStreamCheckConfig:
|
337
|
+
type: object
|
338
|
+
required:
|
339
|
+
- type
|
340
|
+
- dynamic_stream_name
|
341
|
+
properties:
|
342
|
+
type:
|
343
|
+
type: string
|
344
|
+
enum: [DynamicStreamCheckConfig]
|
345
|
+
dynamic_stream_name:
|
346
|
+
title: Dynamic Stream Name
|
347
|
+
description: The dynamic stream name.
|
348
|
+
type: string
|
349
|
+
stream_count:
|
350
|
+
title: Stream Count
|
351
|
+
description: The number of streams to attempt reading from during a check operation. If `stream_count` exceeds the total number of available streams, the minimum of the two values will be used.
|
352
|
+
type: integer
|
353
|
+
default: 0
|
328
354
|
CheckDynamicStream:
|
329
355
|
title: Dynamic Streams to Check
|
330
356
|
description: (This component is experimental. Use at your own risk.) Defines the dynamic streams to try reading when running a check operation.
|
@@ -1422,42 +1448,6 @@ definitions:
|
|
1422
1448
|
- "$ref": "#/definitions/LegacyToPerPartitionStateMigration"
|
1423
1449
|
- "$ref": "#/definitions/CustomStateMigration"
|
1424
1450
|
default: []
|
1425
|
-
file_uploader:
|
1426
|
-
title: File Uploader
|
1427
|
-
description: (experimental) Describes how to fetch a file
|
1428
|
-
type: object
|
1429
|
-
required:
|
1430
|
-
- type
|
1431
|
-
- requester
|
1432
|
-
- download_target_extractor
|
1433
|
-
properties:
|
1434
|
-
type:
|
1435
|
-
type: string
|
1436
|
-
enum: [ FileUploader ]
|
1437
|
-
requester:
|
1438
|
-
description: Requester component that describes how to prepare HTTP requests to send to the source API.
|
1439
|
-
anyOf:
|
1440
|
-
- "$ref": "#/definitions/CustomRequester"
|
1441
|
-
- "$ref": "#/definitions/HttpRequester"
|
1442
|
-
download_target_extractor:
|
1443
|
-
description: Responsible for fetching the url where the file is located. This is applied on each records and not on the HTTP response
|
1444
|
-
anyOf:
|
1445
|
-
- "$ref": "#/definitions/CustomRecordExtractor"
|
1446
|
-
- "$ref": "#/definitions/DpathExtractor"
|
1447
|
-
file_extractor:
|
1448
|
-
description: Responsible for fetching the content of the file. If not defined, the assumption is that the whole response body is the file content
|
1449
|
-
anyOf:
|
1450
|
-
- "$ref": "#/definitions/CustomRecordExtractor"
|
1451
|
-
- "$ref": "#/definitions/DpathExtractor"
|
1452
|
-
filename_extractor:
|
1453
|
-
description: Defines the name to store the file. Stream name is automatically added to the file path. File unique ID can be used to avoid overwriting files. Random UUID will be used if the extractor is not provided.
|
1454
|
-
type: string
|
1455
|
-
interpolation_context:
|
1456
|
-
- config
|
1457
|
-
- record
|
1458
|
-
examples:
|
1459
|
-
- "{{ record.id }}/{{ record.file_name }}/"
|
1460
|
-
- "{{ record.id }}_{{ record.file_name }}/"
|
1461
1451
|
$parameters:
|
1462
1452
|
type: object
|
1463
1453
|
additional_properties: true
|
@@ -1762,6 +1752,30 @@ definitions:
|
|
1762
1752
|
$parameters:
|
1763
1753
|
type: object
|
1764
1754
|
additionalProperties: true
|
1755
|
+
GroupByKeyMergeStrategy:
|
1756
|
+
title: Group by Key
|
1757
|
+
description: Record merge strategy that combines records according to fields on the record.
|
1758
|
+
required:
|
1759
|
+
- type
|
1760
|
+
- key
|
1761
|
+
properties:
|
1762
|
+
type:
|
1763
|
+
type: string
|
1764
|
+
enum: [GroupByKeyMergeStrategy]
|
1765
|
+
key:
|
1766
|
+
title: Key
|
1767
|
+
description: The name of the field on the record whose value will be used to group properties that were retrieved through multiple API requests.
|
1768
|
+
anyOf:
|
1769
|
+
- type: string
|
1770
|
+
- type: array
|
1771
|
+
items:
|
1772
|
+
type: string
|
1773
|
+
examples:
|
1774
|
+
- "id"
|
1775
|
+
- ["parent_id", "end_date"]
|
1776
|
+
$parameters:
|
1777
|
+
type: object
|
1778
|
+
additionalProperties: true
|
1765
1779
|
SessionTokenAuthenticator:
|
1766
1780
|
type: object
|
1767
1781
|
required:
|
@@ -1981,7 +1995,9 @@ definitions:
|
|
1981
1995
|
- type: string
|
1982
1996
|
- type: object
|
1983
1997
|
additionalProperties:
|
1984
|
-
|
1998
|
+
anyOf:
|
1999
|
+
- type: string
|
2000
|
+
- $ref": "#/definitions/QueryProperties"
|
1985
2001
|
interpolation_context:
|
1986
2002
|
- next_page_token
|
1987
2003
|
- stream_interval
|
@@ -2228,7 +2244,8 @@ definitions:
|
|
2228
2244
|
type: object
|
2229
2245
|
additionalProperties: true
|
2230
2246
|
JsonDecoder:
|
2231
|
-
title:
|
2247
|
+
title: JSON
|
2248
|
+
description: Select 'JSON' if the response is formatted as a JSON object.
|
2232
2249
|
type: object
|
2233
2250
|
required:
|
2234
2251
|
- type
|
@@ -2237,8 +2254,8 @@ definitions:
|
|
2237
2254
|
type: string
|
2238
2255
|
enum: [JsonDecoder]
|
2239
2256
|
JsonlDecoder:
|
2240
|
-
title:
|
2241
|
-
description:
|
2257
|
+
title: JSON Lines
|
2258
|
+
description: Select 'JSON Lines' if the response consists of JSON objects separated by new lines ('\n') in JSONL format.
|
2242
2259
|
type: object
|
2243
2260
|
required:
|
2244
2261
|
- type
|
@@ -2363,8 +2380,8 @@ definitions:
|
|
2363
2380
|
type: object
|
2364
2381
|
additionalProperties: true
|
2365
2382
|
IterableDecoder:
|
2366
|
-
title: Iterable
|
2367
|
-
description:
|
2383
|
+
title: Iterable
|
2384
|
+
description: Select 'Iterable' if the response consists of strings separated by new lines (`\n`). The string will then be wrapped into a JSON object with the `record` key.
|
2368
2385
|
type: object
|
2369
2386
|
required:
|
2370
2387
|
- type
|
@@ -2373,8 +2390,8 @@ definitions:
|
|
2373
2390
|
type: string
|
2374
2391
|
enum: [IterableDecoder]
|
2375
2392
|
XmlDecoder:
|
2376
|
-
title: XML
|
2377
|
-
description:
|
2393
|
+
title: XML
|
2394
|
+
description: Select 'XML' if the response consists of XML-formatted data.
|
2378
2395
|
type: object
|
2379
2396
|
required:
|
2380
2397
|
- type
|
@@ -2405,8 +2422,8 @@ definitions:
|
|
2405
2422
|
type: object
|
2406
2423
|
additionalProperties: true
|
2407
2424
|
ZipfileDecoder:
|
2408
|
-
title:
|
2409
|
-
description:
|
2425
|
+
title: ZIP File
|
2426
|
+
description: Select 'ZIP file' for response data that is returned as a zipfile. Requires specifying an inner data type/decoder to parse the unzipped data.
|
2410
2427
|
type: object
|
2411
2428
|
additionalProperties: true
|
2412
2429
|
required:
|
@@ -2930,7 +2947,7 @@ definitions:
|
|
2930
2947
|
title: Lazy Read Pointer
|
2931
2948
|
description: If set, this will enable lazy reading, using the initial read of parent records to extract child records.
|
2932
2949
|
type: array
|
2933
|
-
default: [
|
2950
|
+
default: []
|
2934
2951
|
items:
|
2935
2952
|
- type: string
|
2936
2953
|
interpolation_context:
|
@@ -2998,6 +3015,96 @@ definitions:
|
|
2998
3015
|
examples:
|
2999
3016
|
- id
|
3000
3017
|
- ["code", "type"]
|
3018
|
+
PropertiesFromEndpoint:
|
3019
|
+
title: Properties from Endpoint
|
3020
|
+
description: Defines the behavior for fetching the list of properties from an API that will be loaded into the requests to extract records.
|
3021
|
+
type: object
|
3022
|
+
required:
|
3023
|
+
- type
|
3024
|
+
- property_field_path
|
3025
|
+
- retriever
|
3026
|
+
properties:
|
3027
|
+
type:
|
3028
|
+
type: string
|
3029
|
+
enum: [PropertiesFromEndpoint]
|
3030
|
+
property_field_path:
|
3031
|
+
description: Describes the path to the field that should be extracted
|
3032
|
+
type: array
|
3033
|
+
items:
|
3034
|
+
type: string
|
3035
|
+
examples:
|
3036
|
+
- ["name"]
|
3037
|
+
interpolation_context:
|
3038
|
+
- config
|
3039
|
+
- parameters
|
3040
|
+
retriever:
|
3041
|
+
description: Requester component that describes how to fetch the properties to query from a remote API endpoint.
|
3042
|
+
anyOf:
|
3043
|
+
- "$ref": "#/definitions/CustomRetriever"
|
3044
|
+
- "$ref": "#/definitions/SimpleRetriever"
|
3045
|
+
$parameters:
|
3046
|
+
type: object
|
3047
|
+
additionalProperties: true
|
3048
|
+
PropertyChunking:
|
3049
|
+
title: Property Chunking
|
3050
|
+
description: For APIs with restrictions on the amount of properties that can be requester per request, property chunking can be applied to make multiple requests with a subset of the properties.
|
3051
|
+
type: object
|
3052
|
+
required:
|
3053
|
+
- type
|
3054
|
+
- property_limit_type
|
3055
|
+
properties:
|
3056
|
+
type:
|
3057
|
+
type: string
|
3058
|
+
enum: [PropertyChunking]
|
3059
|
+
property_limit_type:
|
3060
|
+
title: Property Limit Type
|
3061
|
+
description: The type used to determine the maximum number of properties per chunk
|
3062
|
+
enum:
|
3063
|
+
- characters
|
3064
|
+
- property_count
|
3065
|
+
property_limit:
|
3066
|
+
title: Property Limit
|
3067
|
+
description: The maximum amount of properties that can be retrieved per request according to the limit type.
|
3068
|
+
type: integer
|
3069
|
+
record_merge_strategy:
|
3070
|
+
title: Record Merge Strategy
|
3071
|
+
description: Dictates how to records that require multiple requests to get all properties should be emitted to the destination
|
3072
|
+
"$ref": "#/definitions/GroupByKeyMergeStrategy"
|
3073
|
+
$parameters:
|
3074
|
+
type: object
|
3075
|
+
additionalProperties: true
|
3076
|
+
QueryProperties:
|
3077
|
+
title: Query Properties
|
3078
|
+
description: For APIs that require explicit specification of the properties to query for, this component specifies which property fields and how they are supplied to outbound requests.
|
3079
|
+
type: object
|
3080
|
+
required:
|
3081
|
+
- type
|
3082
|
+
- property_list
|
3083
|
+
properties:
|
3084
|
+
type:
|
3085
|
+
type: string
|
3086
|
+
enum: [QueryProperties]
|
3087
|
+
property_list:
|
3088
|
+
title: Property List
|
3089
|
+
description: The set of properties that will be queried for in the outbound request. This can either be statically defined or dynamic based on an API endpoint
|
3090
|
+
anyOf:
|
3091
|
+
- type: array
|
3092
|
+
items:
|
3093
|
+
type: string
|
3094
|
+
- "$ref": "#/definitions/PropertiesFromEndpoint"
|
3095
|
+
always_include_properties:
|
3096
|
+
title: Always Include Properties
|
3097
|
+
description: The list of properties that should be included in every set of properties when multiple chunks of properties are being requested.
|
3098
|
+
type: array
|
3099
|
+
items:
|
3100
|
+
type: string
|
3101
|
+
property_chunking:
|
3102
|
+
title: Property Chunking
|
3103
|
+
description: Defines how query properties will be grouped into smaller sets for APIs with limitations on the number of properties fetched per API request.
|
3104
|
+
"$ref": "#/definitions/PropertyChunking"
|
3105
|
+
$parameters:
|
3106
|
+
type: object
|
3107
|
+
additionalProperties: true
|
3001
3108
|
RecordFilter:
|
3002
3109
|
title: Record Filter
|
3003
3110
|
description: Filter applied on a list of records.
|
@@ -3235,7 +3342,7 @@ definitions:
|
|
3235
3342
|
properties:
|
3236
3343
|
type:
|
3237
3344
|
type: string
|
3238
|
-
enum: [
|
3345
|
+
enum: [StateDelegatingStream]
|
3239
3346
|
name:
|
3240
3347
|
title: Name
|
3241
3348
|
description: The stream name.
|
@@ -3290,12 +3397,14 @@ definitions:
|
|
3290
3397
|
- "$ref": "#/definitions/CustomPartitionRouter"
|
3291
3398
|
- "$ref": "#/definitions/ListPartitionRouter"
|
3292
3399
|
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3400
|
+
- "$ref": "#/definitions/GroupingPartitionRouter"
|
3293
3401
|
- type: array
|
3294
3402
|
items:
|
3295
3403
|
anyOf:
|
3296
3404
|
- "$ref": "#/definitions/CustomPartitionRouter"
|
3297
3405
|
- "$ref": "#/definitions/ListPartitionRouter"
|
3298
3406
|
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3407
|
+
- "$ref": "#/definitions/GroupingPartitionRouter"
|
3299
3408
|
decoder:
|
3300
3409
|
title: Decoder
|
3301
3410
|
description: Component decoding the response so records can be extracted.
|
@@ -3312,6 +3421,8 @@ definitions:
|
|
3312
3421
|
type: object
|
3313
3422
|
additionalProperties: true
|
3314
3423
|
GzipDecoder:
|
3424
|
+
title: gzip
|
3425
|
+
description: Select 'gzip' for response data that is compressed with gzip. Requires specifying an inner data type/decoder to parse the decompressed data.
|
3315
3426
|
type: object
|
3316
3427
|
required:
|
3317
3428
|
- type
|
@@ -3327,6 +3438,8 @@ definitions:
|
|
3327
3438
|
- "$ref": "#/definitions/JsonDecoder"
|
3328
3439
|
- "$ref": "#/definitions/JsonlDecoder"
|
3329
3440
|
CsvDecoder:
|
3441
|
+
title: CSV
|
3442
|
+
description: "Select 'CSV' for response data that is formatted as CSV (comma-separated values). Can specify an encoding (default: 'utf-8') and a delimiter (default: ',')."
|
3330
3443
|
type: object
|
3331
3444
|
required:
|
3332
3445
|
- type
|
@@ -3457,12 +3570,14 @@ definitions:
|
|
3457
3570
|
- "$ref": "#/definitions/CustomPartitionRouter"
|
3458
3571
|
- "$ref": "#/definitions/ListPartitionRouter"
|
3459
3572
|
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3573
|
+
- "$ref": "#/definitions/GroupingPartitionRouter"
|
3460
3574
|
- type: array
|
3461
3575
|
items:
|
3462
3576
|
anyOf:
|
3463
3577
|
- "$ref": "#/definitions/CustomPartitionRouter"
|
3464
3578
|
- "$ref": "#/definitions/ListPartitionRouter"
|
3465
3579
|
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3580
|
+
- "$ref": "#/definitions/GroupingPartitionRouter"
|
3466
3581
|
decoder:
|
3467
3582
|
title: Decoder
|
3468
3583
|
description: Component decoding the response so records can be extracted.
|
@@ -3579,6 +3694,44 @@ definitions:
|
|
3579
3694
|
$parameters:
|
3580
3695
|
type: object
|
3581
3696
|
additionalProperties: true
|
3697
|
+
GroupingPartitionRouter:
|
3698
|
+
title: Grouping Partition Router
|
3699
|
+
description: >
|
3700
|
+
A decorator on top of a partition router that groups partitions into batches of a specified size.
|
3701
|
+
This is useful for APIs that support filtering by multiple partition keys in a single request.
|
3702
|
+
Note that per-partition incremental syncs may not work as expected because the grouping
|
3703
|
+
of partitions might change between syncs, potentially leading to inconsistent state tracking.
|
3704
|
+
type: object
|
3705
|
+
required:
|
3706
|
+
- type
|
3707
|
+
- group_size
|
3708
|
+
- underlying_partition_router
|
3709
|
+
properties:
|
3710
|
+
type:
|
3711
|
+
type: string
|
3712
|
+
enum: [GroupingPartitionRouter]
|
3713
|
+
group_size:
|
3714
|
+
title: Group Size
|
3715
|
+
description: The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.
|
3716
|
+
type: integer
|
3717
|
+
examples:
|
3718
|
+
- 10
|
3719
|
+
- 50
|
3720
|
+
underlying_partition_router:
|
3721
|
+
title: Underlying Partition Router
|
3722
|
+
description: The partition router whose output will be grouped. This can be any valid partition router component.
|
3723
|
+
anyOf:
|
3724
|
+
- "$ref": "#/definitions/CustomPartitionRouter"
|
3725
|
+
- "$ref": "#/definitions/ListPartitionRouter"
|
3726
|
+
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3727
|
+
deduplicate:
|
3728
|
+
title: Deduplicate Partitions
|
3729
|
+
description: If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.
|
3730
|
+
type: boolean
|
3731
|
+
default: true
|
3732
|
+
$parameters:
|
3733
|
+
type: object
|
3734
|
+
additionalProperties: true
|
3582
3735
|
WaitUntilTimeFromHeader:
|
3583
3736
|
title: Wait Until Time Defined In Response Header
|
3584
3737
|
description: Extract time at which we can retry the request from response header and wait for the difference between now and that time.
|
@@ -3750,6 +3903,13 @@ definitions:
|
|
3750
3903
|
type:
|
3751
3904
|
type: string
|
3752
3905
|
enum: [DynamicDeclarativeStream]
|
3906
|
+
name:
|
3907
|
+
title: Name
|
3908
|
+
description: The dynamic stream name.
|
3909
|
+
type: string
|
3910
|
+
default: ""
|
3911
|
+
example:
|
3912
|
+
- "Tables"
|
3753
3913
|
stream_template:
|
3754
3914
|
title: Stream Template
|
3755
3915
|
description: Reference to the stream template.
|
@@ -15,7 +15,6 @@ from airbyte_cdk.sources.declarative.extractors.type_transformer import (
|
|
15
15
|
)
|
16
16
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
17
17
|
from airbyte_cdk.sources.declarative.models import SchemaNormalization
|
18
|
-
from airbyte_cdk.sources.declarative.retrievers.file_uploader import FileUploader
|
19
18
|
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
20
19
|
from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
|
21
20
|
from airbyte_cdk.sources.utils.transform import TypeTransformer
|
@@ -43,7 +42,6 @@ class RecordSelector(HttpSelector):
|
|
43
42
|
record_filter: Optional[RecordFilter] = None
|
44
43
|
transformations: List[RecordTransformation] = field(default_factory=lambda: [])
|
45
44
|
transform_before_filtering: bool = False
|
46
|
-
file_uploader: Optional[FileUploader] = None
|
47
45
|
|
48
46
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
49
47
|
self._parameters = parameters
|
@@ -119,10 +117,7 @@ class RecordSelector(HttpSelector):
|
|
119
117
|
transformed_filtered_data, schema=records_schema
|
120
118
|
)
|
121
119
|
for data in normalized_data:
|
122
|
-
|
123
|
-
if self.file_uploader:
|
124
|
-
self.file_uploader.upload(record)
|
125
|
-
yield record
|
120
|
+
yield Record(data=data, stream_name=self.name, associated_slice=stream_slice)
|
126
121
|
|
127
122
|
def _normalize_by_schema(
|
128
123
|
self, records: Iterable[Mapping[str, Any]], schema: Optional[Mapping[str, Any]]
|
@@ -79,6 +79,7 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
79
79
|
connector_state_manager: ConnectorStateManager,
|
80
80
|
connector_state_converter: AbstractStreamStateConverter,
|
81
81
|
cursor_field: CursorField,
|
82
|
+
use_global_cursor: bool = False,
|
82
83
|
) -> None:
|
83
84
|
self._global_cursor: Optional[StreamState] = {}
|
84
85
|
self._stream_name = stream_name
|
@@ -106,7 +107,7 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
106
107
|
self._lookback_window: int = 0
|
107
108
|
self._parent_state: Optional[StreamState] = None
|
108
109
|
self._number_of_partitions: int = 0
|
109
|
-
self._use_global_cursor: bool =
|
110
|
+
self._use_global_cursor: bool = use_global_cursor
|
110
111
|
self._partition_serializer = PerPartitionKeySerializer()
|
111
112
|
# Track the last time a state message was emitted
|
112
113
|
self._last_emission_time: float = 0.0
|
@@ -156,7 +156,7 @@ def duration(datestring: str) -> Union[datetime.timedelta, isodate.Duration]:
|
|
156
156
|
|
157
157
|
|
158
158
|
def format_datetime(
|
159
|
-
dt: Union[str, datetime.datetime], format: str, input_format: Optional[str] = None
|
159
|
+
dt: Union[str, datetime.datetime, int], format: str, input_format: Optional[str] = None
|
160
160
|
) -> str:
|
161
161
|
"""
|
162
162
|
Converts datetime to another format
|
@@ -170,9 +170,15 @@ def format_datetime(
|
|
170
170
|
"""
|
171
171
|
if isinstance(dt, datetime.datetime):
|
172
172
|
return dt.strftime(format)
|
173
|
-
|
174
|
-
|
175
|
-
|
173
|
+
|
174
|
+
if isinstance(dt, int):
|
175
|
+
dt_datetime = DatetimeParser().parse(dt, input_format if input_format else "%s")
|
176
|
+
else:
|
177
|
+
dt_datetime = (
|
178
|
+
datetime.datetime.strptime(dt, input_format) if input_format else str_to_datetime(dt)
|
179
|
+
)
|
180
|
+
if dt_datetime.tzinfo is None:
|
181
|
+
dt_datetime = dt_datetime.replace(tzinfo=pytz.utc)
|
176
182
|
return DatetimeParser().format(dt=dt_datetime, format=format)
|
177
183
|
|
178
184
|
|
@@ -106,6 +106,7 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
106
106
|
AlwaysLogSliceLogger() if emit_connector_builder_messages else DebugSliceLogger()
|
107
107
|
)
|
108
108
|
|
109
|
+
self._config = config or {}
|
109
110
|
self._validate_source()
|
110
111
|
|
111
112
|
@property
|
@@ -116,6 +117,12 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
116
117
|
def message_repository(self) -> MessageRepository:
|
117
118
|
return self._message_repository
|
118
119
|
|
120
|
+
@property
|
121
|
+
def dynamic_streams(self) -> List[Dict[str, Any]]:
|
122
|
+
return self._dynamic_stream_configs(
|
123
|
+
manifest=self._source_config, config=self._config, with_dynamic_stream_name=True
|
124
|
+
)
|
125
|
+
|
119
126
|
@property
|
120
127
|
def connection_checker(self) -> ConnectionChecker:
|
121
128
|
check = self._source_config["check"]
|
@@ -348,13 +355,16 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
348
355
|
return stream_configs
|
349
356
|
|
350
357
|
def _dynamic_stream_configs(
|
351
|
-
self,
|
358
|
+
self,
|
359
|
+
manifest: Mapping[str, Any],
|
360
|
+
config: Mapping[str, Any],
|
361
|
+
with_dynamic_stream_name: Optional[bool] = None,
|
352
362
|
) -> List[Dict[str, Any]]:
|
353
363
|
dynamic_stream_definitions: List[Dict[str, Any]] = manifest.get("dynamic_streams", [])
|
354
364
|
dynamic_stream_configs: List[Dict[str, Any]] = []
|
355
365
|
seen_dynamic_streams: Set[str] = set()
|
356
366
|
|
357
|
-
for dynamic_definition in dynamic_stream_definitions:
|
367
|
+
for dynamic_definition_index, dynamic_definition in enumerate(dynamic_stream_definitions):
|
358
368
|
components_resolver_config = dynamic_definition["components_resolver"]
|
359
369
|
|
360
370
|
if not components_resolver_config:
|
@@ -387,12 +397,23 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
387
397
|
for dynamic_stream in components_resolver.resolve_components(
|
388
398
|
stream_template_config=stream_template_config
|
389
399
|
):
|
400
|
+
dynamic_stream = {
|
401
|
+
**ManifestComponentTransformer().propagate_types_and_parameters(
|
402
|
+
"", dynamic_stream, {}, use_parent_parameters=True
|
403
|
+
)
|
404
|
+
}
|
405
|
+
|
390
406
|
if "type" not in dynamic_stream:
|
391
407
|
dynamic_stream["type"] = "DeclarativeStream"
|
392
408
|
|
393
409
|
# Ensure that each stream is created with a unique name
|
394
410
|
name = dynamic_stream.get("name")
|
395
411
|
|
412
|
+
if with_dynamic_stream_name:
|
413
|
+
dynamic_stream["dynamic_stream_name"] = dynamic_definition.get(
|
414
|
+
"name", f"dynamic_stream_{dynamic_definition_index}"
|
415
|
+
)
|
416
|
+
|
396
417
|
if not isinstance(name, str):
|
397
418
|
raise ValueError(
|
398
419
|
f"Expected stream name {name} to be a string, got {type(name)}."
|