airbyte-cdk 6.45.0__py3-none-any.whl → 6.45.0.dev4101__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/connector_builder_handler.py +6 -45
- airbyte_cdk/connector_builder/main.py +2 -5
- airbyte_cdk/models/__init__.py +1 -0
- airbyte_cdk/models/airbyte_protocol.py +1 -3
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +1 -1
- airbyte_cdk/sources/declarative/async_job/job.py +0 -6
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +18 -18
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +6 -22
- airbyte_cdk/sources/declarative/checks/__init__.py +2 -5
- airbyte_cdk/sources/declarative/checks/check_stream.py +11 -113
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +8 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +50 -210
- airbyte_cdk/sources/declarative/extractors/record_selector.py +6 -1
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +1 -2
- airbyte_cdk/sources/declarative/interpolation/macros.py +4 -8
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +2 -23
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +43 -142
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +4 -16
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +50 -263
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +0 -4
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +1 -5
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +2 -25
- airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +30 -101
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +9 -4
- airbyte_cdk/sources/declarative/transformations/add_fields.py +1 -3
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +32 -14
- airbyte_cdk/sources/file_based/file_record_data.py +24 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +8 -15
- airbyte_cdk/sources/file_based/schema_helpers.py +11 -1
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +0 -1
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +16 -31
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +1 -3
- airbyte_cdk/sources/streams/concurrent/default_stream.py +3 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +0 -4
- airbyte_cdk/sources/types.py +11 -2
- airbyte_cdk/sources/utils/files_directory.py +15 -0
- airbyte_cdk/sources/utils/record_helper.py +8 -8
- {airbyte_cdk-6.45.0.dist-info → airbyte_cdk-6.45.0.dev4101.dist-info}/METADATA +2 -2
- {airbyte_cdk-6.45.0.dist-info → airbyte_cdk-6.45.0.dev4101.dist-info}/RECORD +44 -50
- airbyte_cdk/models/file_transfer_record_message.py +0 -13
- airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +0 -150
- airbyte_cdk/sources/declarative/requesters/query_properties/__init__.py +0 -13
- airbyte_cdk/sources/declarative/requesters/query_properties/properties_from_endpoint.py +0 -40
- airbyte_cdk/sources/declarative/requesters/query_properties/property_chunking.py +0 -69
- airbyte_cdk/sources/declarative/requesters/query_properties/query_properties.py +0 -58
- airbyte_cdk/sources/declarative/requesters/query_properties/strategies/__init__.py +0 -10
- airbyte_cdk/sources/declarative/requesters/query_properties/strategies/group_by_key.py +0 -33
- airbyte_cdk/sources/declarative/requesters/query_properties/strategies/merge_strategy.py +0 -19
- {airbyte_cdk-6.45.0.dist-info → airbyte_cdk-6.45.0.dev4101.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.45.0.dist-info → airbyte_cdk-6.45.0.dev4101.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.45.0.dist-info → airbyte_cdk-6.45.0.dev4101.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.45.0.dist-info → airbyte_cdk-6.45.0.dev4101.dist-info}/entry_points.txt +0 -0
@@ -47,12 +47,7 @@ properties:
|
|
47
47
|
max_concurrent_async_job_count:
|
48
48
|
title: Maximum Concurrent Asynchronous Jobs
|
49
49
|
description: Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.
|
50
|
-
type:
|
51
|
-
- integer
|
52
|
-
- string
|
53
|
-
examples:
|
54
|
-
- 3
|
55
|
-
- "{{ config['max_concurrent_async_job_count'] }}"
|
50
|
+
type: integer
|
56
51
|
metadata:
|
57
52
|
type: object
|
58
53
|
description: For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.
|
@@ -316,6 +311,7 @@ definitions:
|
|
316
311
|
type: object
|
317
312
|
required:
|
318
313
|
- type
|
314
|
+
- stream_names
|
319
315
|
properties:
|
320
316
|
type:
|
321
317
|
type: string
|
@@ -329,28 +325,6 @@ definitions:
|
|
329
325
|
examples:
|
330
326
|
- ["users"]
|
331
327
|
- ["users", "contacts"]
|
332
|
-
dynamic_streams_check_configs:
|
333
|
-
type: array
|
334
|
-
items:
|
335
|
-
"$ref": "#/definitions/DynamicStreamCheckConfig"
|
336
|
-
DynamicStreamCheckConfig:
|
337
|
-
type: object
|
338
|
-
required:
|
339
|
-
- type
|
340
|
-
- dynamic_stream_name
|
341
|
-
properties:
|
342
|
-
type:
|
343
|
-
type: string
|
344
|
-
enum: [DynamicStreamCheckConfig]
|
345
|
-
dynamic_stream_name:
|
346
|
-
title: Dynamic Stream Name
|
347
|
-
description: The dynamic stream name.
|
348
|
-
type: string
|
349
|
-
stream_count:
|
350
|
-
title: Stream Count
|
351
|
-
description: The number of streams to attempt reading from during a check operation. If `stream_count` exceeds the total number of available streams, the minimum of the two values will be used.
|
352
|
-
type: integer
|
353
|
-
default: 0
|
354
328
|
CheckDynamicStream:
|
355
329
|
title: Dynamic Streams to Check
|
356
330
|
description: (This component is experimental. Use at your own risk.) Defines the dynamic streams to try reading when running a check operation.
|
@@ -1448,6 +1422,42 @@ definitions:
|
|
1448
1422
|
- "$ref": "#/definitions/LegacyToPerPartitionStateMigration"
|
1449
1423
|
- "$ref": "#/definitions/CustomStateMigration"
|
1450
1424
|
default: []
|
1425
|
+
file_uploader:
|
1426
|
+
title: File Uploader
|
1427
|
+
description: (experimental) Describes how to fetch a file
|
1428
|
+
type: object
|
1429
|
+
required:
|
1430
|
+
- type
|
1431
|
+
- requester
|
1432
|
+
- download_target_extractor
|
1433
|
+
properties:
|
1434
|
+
type:
|
1435
|
+
type: string
|
1436
|
+
enum: [ FileUploader ]
|
1437
|
+
requester:
|
1438
|
+
description: Requester component that describes how to prepare HTTP requests to send to the source API.
|
1439
|
+
anyOf:
|
1440
|
+
- "$ref": "#/definitions/CustomRequester"
|
1441
|
+
- "$ref": "#/definitions/HttpRequester"
|
1442
|
+
download_target_extractor:
|
1443
|
+
description: Responsible for fetching the url where the file is located. This is applied on each records and not on the HTTP response
|
1444
|
+
anyOf:
|
1445
|
+
- "$ref": "#/definitions/CustomRecordExtractor"
|
1446
|
+
- "$ref": "#/definitions/DpathExtractor"
|
1447
|
+
file_extractor:
|
1448
|
+
description: Responsible for fetching the content of the file. If not defined, the assumption is that the whole response body is the file content
|
1449
|
+
anyOf:
|
1450
|
+
- "$ref": "#/definitions/CustomRecordExtractor"
|
1451
|
+
- "$ref": "#/definitions/DpathExtractor"
|
1452
|
+
filename_extractor:
|
1453
|
+
description: Defines the name to store the file. Stream name is automatically added to the file path. File unique ID can be used to avoid overwriting files. Random UUID will be used if the extractor is not provided.
|
1454
|
+
type: string
|
1455
|
+
interpolation_context:
|
1456
|
+
- config
|
1457
|
+
- record
|
1458
|
+
examples:
|
1459
|
+
- "{{ record.id }}/{{ record.file_name }}/"
|
1460
|
+
- "{{ record.id }}_{{ record.file_name }}/"
|
1451
1461
|
$parameters:
|
1452
1462
|
type: object
|
1453
1463
|
additional_properties: true
|
@@ -1752,30 +1762,6 @@ definitions:
|
|
1752
1762
|
$parameters:
|
1753
1763
|
type: object
|
1754
1764
|
additionalProperties: true
|
1755
|
-
GroupByKeyMergeStrategy:
|
1756
|
-
title: Group by Key
|
1757
|
-
description: Record merge strategy that combines records according to fields on the record.
|
1758
|
-
required:
|
1759
|
-
- type
|
1760
|
-
- key
|
1761
|
-
properties:
|
1762
|
-
type:
|
1763
|
-
type: string
|
1764
|
-
enum: [GroupByKeyMergeStrategy]
|
1765
|
-
key:
|
1766
|
-
title: Key
|
1767
|
-
description: The name of the field on the record whose value will be used to group properties that were retrieved through multiple API requests.
|
1768
|
-
anyOf:
|
1769
|
-
- type: string
|
1770
|
-
- type: array
|
1771
|
-
items:
|
1772
|
-
type: string
|
1773
|
-
examples:
|
1774
|
-
- "id"
|
1775
|
-
- ["parent_id", "end_date"]
|
1776
|
-
$parameters:
|
1777
|
-
type: object
|
1778
|
-
additionalProperties: true
|
1779
1765
|
SessionTokenAuthenticator:
|
1780
1766
|
type: object
|
1781
1767
|
required:
|
@@ -1995,9 +1981,7 @@ definitions:
|
|
1995
1981
|
- type: string
|
1996
1982
|
- type: object
|
1997
1983
|
additionalProperties:
|
1998
|
-
|
1999
|
-
- type: string
|
2000
|
-
- $ref": "#/definitions/QueryProperties"
|
1984
|
+
type: string
|
2001
1985
|
interpolation_context:
|
2002
1986
|
- next_page_token
|
2003
1987
|
- stream_interval
|
@@ -2244,8 +2228,7 @@ definitions:
|
|
2244
2228
|
type: object
|
2245
2229
|
additionalProperties: true
|
2246
2230
|
JsonDecoder:
|
2247
|
-
title:
|
2248
|
-
description: Select 'JSON' if the response is formatted as a JSON object.
|
2231
|
+
title: Json Decoder
|
2249
2232
|
type: object
|
2250
2233
|
required:
|
2251
2234
|
- type
|
@@ -2254,8 +2237,8 @@ definitions:
|
|
2254
2237
|
type: string
|
2255
2238
|
enum: [JsonDecoder]
|
2256
2239
|
JsonlDecoder:
|
2257
|
-
title:
|
2258
|
-
description:
|
2240
|
+
title: JSONL Decoder
|
2241
|
+
description: Use this if the response consists of JSON objects separated by new lines (`\n`) in JSONL format.
|
2259
2242
|
type: object
|
2260
2243
|
required:
|
2261
2244
|
- type
|
@@ -2380,8 +2363,8 @@ definitions:
|
|
2380
2363
|
type: object
|
2381
2364
|
additionalProperties: true
|
2382
2365
|
IterableDecoder:
|
2383
|
-
title: Iterable
|
2384
|
-
description:
|
2366
|
+
title: Iterable Decoder
|
2367
|
+
description: Use this if the response consists of strings separated by new lines (`\n`). The Decoder will wrap each row into a JSON object with the `record` key.
|
2385
2368
|
type: object
|
2386
2369
|
required:
|
2387
2370
|
- type
|
@@ -2390,8 +2373,8 @@ definitions:
|
|
2390
2373
|
type: string
|
2391
2374
|
enum: [IterableDecoder]
|
2392
2375
|
XmlDecoder:
|
2393
|
-
title: XML
|
2394
|
-
description:
|
2376
|
+
title: XML Decoder
|
2377
|
+
description: Use this if the response is XML.
|
2395
2378
|
type: object
|
2396
2379
|
required:
|
2397
2380
|
- type
|
@@ -2422,8 +2405,8 @@ definitions:
|
|
2422
2405
|
type: object
|
2423
2406
|
additionalProperties: true
|
2424
2407
|
ZipfileDecoder:
|
2425
|
-
title:
|
2426
|
-
description:
|
2408
|
+
title: Zipfile Decoder
|
2409
|
+
description: Decoder for response data that is returned as zipfile(s).
|
2427
2410
|
type: object
|
2428
2411
|
additionalProperties: true
|
2429
2412
|
required:
|
@@ -2947,7 +2930,7 @@ definitions:
|
|
2947
2930
|
title: Lazy Read Pointer
|
2948
2931
|
description: If set, this will enable lazy reading, using the initial read of parent records to extract child records.
|
2949
2932
|
type: array
|
2950
|
-
default: []
|
2933
|
+
default: [ ]
|
2951
2934
|
items:
|
2952
2935
|
- type: string
|
2953
2936
|
interpolation_context:
|
@@ -3015,96 +2998,6 @@ definitions:
|
|
3015
2998
|
examples:
|
3016
2999
|
- id
|
3017
3000
|
- ["code", "type"]
|
3018
|
-
PropertiesFromEndpoint:
|
3019
|
-
title: Properties from Endpoint
|
3020
|
-
description: Defines the behavior for fetching the list of properties from an API that will be loaded into the requests to extract records.
|
3021
|
-
type: object
|
3022
|
-
required:
|
3023
|
-
- type
|
3024
|
-
- property_field_path
|
3025
|
-
- retriever
|
3026
|
-
properties:
|
3027
|
-
type:
|
3028
|
-
type: string
|
3029
|
-
enum: [PropertiesFromEndpoint]
|
3030
|
-
property_field_path:
|
3031
|
-
description: Describes the path to the field that should be extracted
|
3032
|
-
type: array
|
3033
|
-
items:
|
3034
|
-
type: string
|
3035
|
-
examples:
|
3036
|
-
- ["name"]
|
3037
|
-
interpolation_context:
|
3038
|
-
- config
|
3039
|
-
- parameters
|
3040
|
-
retriever:
|
3041
|
-
description: Requester component that describes how to fetch the properties to query from a remote API endpoint.
|
3042
|
-
anyOf:
|
3043
|
-
- "$ref": "#/definitions/CustomRetriever"
|
3044
|
-
- "$ref": "#/definitions/SimpleRetriever"
|
3045
|
-
$parameters:
|
3046
|
-
type: object
|
3047
|
-
additionalProperties: true
|
3048
|
-
PropertyChunking:
|
3049
|
-
title: Property Chunking
|
3050
|
-
description: For APIs with restrictions on the amount of properties that can be requester per request, property chunking can be applied to make multiple requests with a subset of the properties.
|
3051
|
-
type: object
|
3052
|
-
required:
|
3053
|
-
- type
|
3054
|
-
- property_limit_type
|
3055
|
-
properties:
|
3056
|
-
type:
|
3057
|
-
type: string
|
3058
|
-
enum: [PropertyChunking]
|
3059
|
-
property_limit_type:
|
3060
|
-
title: Property Limit Type
|
3061
|
-
description: The type used to determine the maximum number of properties per chunk
|
3062
|
-
enum:
|
3063
|
-
- characters
|
3064
|
-
- property_count
|
3065
|
-
property_limit:
|
3066
|
-
title: Property Limit
|
3067
|
-
description: The maximum amount of properties that can be retrieved per request according to the limit type.
|
3068
|
-
type: integer
|
3069
|
-
record_merge_strategy:
|
3070
|
-
title: Record Merge Strategy
|
3071
|
-
description: Dictates how to records that require multiple requests to get all properties should be emitted to the destination
|
3072
|
-
"$ref": "#/definitions/GroupByKeyMergeStrategy"
|
3073
|
-
$parameters:
|
3074
|
-
type: object
|
3075
|
-
additionalProperties: true
|
3076
|
-
QueryProperties:
|
3077
|
-
title: Query Properties
|
3078
|
-
description: For APIs that require explicit specification of the properties to query for, this component specifies which property fields and how they are supplied to outbound requests.
|
3079
|
-
type: object
|
3080
|
-
required:
|
3081
|
-
- type
|
3082
|
-
- property_list
|
3083
|
-
properties:
|
3084
|
-
type:
|
3085
|
-
type: string
|
3086
|
-
enum: [QueryProperties]
|
3087
|
-
property_list:
|
3088
|
-
title: Property List
|
3089
|
-
description: The set of properties that will be queried for in the outbound request. This can either be statically defined or dynamic based on an API endpoint
|
3090
|
-
anyOf:
|
3091
|
-
- type: array
|
3092
|
-
items:
|
3093
|
-
type: string
|
3094
|
-
- "$ref": "#/definitions/PropertiesFromEndpoint"
|
3095
|
-
always_include_properties:
|
3096
|
-
title: Always Include Properties
|
3097
|
-
description: The list of properties that should be included in every set of properties when multiple chunks of properties are being requested.
|
3098
|
-
type: array
|
3099
|
-
items:
|
3100
|
-
type: string
|
3101
|
-
property_chunking:
|
3102
|
-
title: Property Chunking
|
3103
|
-
description: Defines how query properties will be grouped into smaller sets for APIs with limitations on the number of properties fetched per API request.
|
3104
|
-
"$ref": "#/definitions/PropertyChunking"
|
3105
|
-
$parameters:
|
3106
|
-
type: object
|
3107
|
-
additionalProperties: true
|
3108
3001
|
RecordFilter:
|
3109
3002
|
title: Record Filter
|
3110
3003
|
description: Filter applied on a list of records.
|
@@ -3342,7 +3235,7 @@ definitions:
|
|
3342
3235
|
properties:
|
3343
3236
|
type:
|
3344
3237
|
type: string
|
3345
|
-
enum: [StateDelegatingStream]
|
3238
|
+
enum: [ StateDelegatingStream ]
|
3346
3239
|
name:
|
3347
3240
|
title: Name
|
3348
3241
|
description: The stream name.
|
@@ -3397,14 +3290,12 @@ definitions:
|
|
3397
3290
|
- "$ref": "#/definitions/CustomPartitionRouter"
|
3398
3291
|
- "$ref": "#/definitions/ListPartitionRouter"
|
3399
3292
|
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3400
|
-
- "$ref": "#/definitions/GroupingPartitionRouter"
|
3401
3293
|
- type: array
|
3402
3294
|
items:
|
3403
3295
|
anyOf:
|
3404
3296
|
- "$ref": "#/definitions/CustomPartitionRouter"
|
3405
3297
|
- "$ref": "#/definitions/ListPartitionRouter"
|
3406
3298
|
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3407
|
-
- "$ref": "#/definitions/GroupingPartitionRouter"
|
3408
3299
|
decoder:
|
3409
3300
|
title: Decoder
|
3410
3301
|
description: Component decoding the response so records can be extracted.
|
@@ -3421,8 +3312,6 @@ definitions:
|
|
3421
3312
|
type: object
|
3422
3313
|
additionalProperties: true
|
3423
3314
|
GzipDecoder:
|
3424
|
-
title: gzip
|
3425
|
-
description: Select 'gzip' for response data that is compressed with gzip. Requires specifying an inner data type/decoder to parse the decompressed data.
|
3426
3315
|
type: object
|
3427
3316
|
required:
|
3428
3317
|
- type
|
@@ -3438,8 +3327,6 @@ definitions:
|
|
3438
3327
|
- "$ref": "#/definitions/JsonDecoder"
|
3439
3328
|
- "$ref": "#/definitions/JsonlDecoder"
|
3440
3329
|
CsvDecoder:
|
3441
|
-
title: CSV
|
3442
|
-
description: "Select 'CSV' for response data that is formatted as CSV (comma-separated values). Can specify an encoding (default: 'utf-8') and a delimiter (default: ',')."
|
3443
3330
|
type: object
|
3444
3331
|
required:
|
3445
3332
|
- type
|
@@ -3570,14 +3457,12 @@ definitions:
|
|
3570
3457
|
- "$ref": "#/definitions/CustomPartitionRouter"
|
3571
3458
|
- "$ref": "#/definitions/ListPartitionRouter"
|
3572
3459
|
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3573
|
-
- "$ref": "#/definitions/GroupingPartitionRouter"
|
3574
3460
|
- type: array
|
3575
3461
|
items:
|
3576
3462
|
anyOf:
|
3577
3463
|
- "$ref": "#/definitions/CustomPartitionRouter"
|
3578
3464
|
- "$ref": "#/definitions/ListPartitionRouter"
|
3579
3465
|
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3580
|
-
- "$ref": "#/definitions/GroupingPartitionRouter"
|
3581
3466
|
decoder:
|
3582
3467
|
title: Decoder
|
3583
3468
|
description: Component decoding the response so records can be extracted.
|
@@ -3694,44 +3579,6 @@ definitions:
|
|
3694
3579
|
$parameters:
|
3695
3580
|
type: object
|
3696
3581
|
additionalProperties: true
|
3697
|
-
GroupingPartitionRouter:
|
3698
|
-
title: Grouping Partition Router
|
3699
|
-
description: >
|
3700
|
-
A decorator on top of a partition router that groups partitions into batches of a specified size.
|
3701
|
-
This is useful for APIs that support filtering by multiple partition keys in a single request.
|
3702
|
-
Note that per-partition incremental syncs may not work as expected because the grouping
|
3703
|
-
of partitions might change between syncs, potentially leading to inconsistent state tracking.
|
3704
|
-
type: object
|
3705
|
-
required:
|
3706
|
-
- type
|
3707
|
-
- group_size
|
3708
|
-
- underlying_partition_router
|
3709
|
-
properties:
|
3710
|
-
type:
|
3711
|
-
type: string
|
3712
|
-
enum: [GroupingPartitionRouter]
|
3713
|
-
group_size:
|
3714
|
-
title: Group Size
|
3715
|
-
description: The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.
|
3716
|
-
type: integer
|
3717
|
-
examples:
|
3718
|
-
- 10
|
3719
|
-
- 50
|
3720
|
-
underlying_partition_router:
|
3721
|
-
title: Underlying Partition Router
|
3722
|
-
description: The partition router whose output will be grouped. This can be any valid partition router component.
|
3723
|
-
anyOf:
|
3724
|
-
- "$ref": "#/definitions/CustomPartitionRouter"
|
3725
|
-
- "$ref": "#/definitions/ListPartitionRouter"
|
3726
|
-
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3727
|
-
deduplicate:
|
3728
|
-
title: Deduplicate Partitions
|
3729
|
-
description: If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.
|
3730
|
-
type: boolean
|
3731
|
-
default: true
|
3732
|
-
$parameters:
|
3733
|
-
type: object
|
3734
|
-
additionalProperties: true
|
3735
3582
|
WaitUntilTimeFromHeader:
|
3736
3583
|
title: Wait Until Time Defined In Response Header
|
3737
3584
|
description: Extract time at which we can retry the request from response header and wait for the difference between now and that time.
|
@@ -3903,13 +3750,6 @@ definitions:
|
|
3903
3750
|
type:
|
3904
3751
|
type: string
|
3905
3752
|
enum: [DynamicDeclarativeStream]
|
3906
|
-
name:
|
3907
|
-
title: Name
|
3908
|
-
description: The dynamic stream name.
|
3909
|
-
type: string
|
3910
|
-
default: ""
|
3911
|
-
example:
|
3912
|
-
- "Tables"
|
3913
3753
|
stream_template:
|
3914
3754
|
title: Stream Template
|
3915
3755
|
description: Reference to the stream template.
|
@@ -15,6 +15,7 @@ from airbyte_cdk.sources.declarative.extractors.type_transformer import (
|
|
15
15
|
)
|
16
16
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
17
17
|
from airbyte_cdk.sources.declarative.models import SchemaNormalization
|
18
|
+
from airbyte_cdk.sources.declarative.retrievers.file_uploader import FileUploader
|
18
19
|
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
19
20
|
from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
|
20
21
|
from airbyte_cdk.sources.utils.transform import TypeTransformer
|
@@ -42,6 +43,7 @@ class RecordSelector(HttpSelector):
|
|
42
43
|
record_filter: Optional[RecordFilter] = None
|
43
44
|
transformations: List[RecordTransformation] = field(default_factory=lambda: [])
|
44
45
|
transform_before_filtering: bool = False
|
46
|
+
file_uploader: Optional[FileUploader] = None
|
45
47
|
|
46
48
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
47
49
|
self._parameters = parameters
|
@@ -117,7 +119,10 @@ class RecordSelector(HttpSelector):
|
|
117
119
|
transformed_filtered_data, schema=records_schema
|
118
120
|
)
|
119
121
|
for data in normalized_data:
|
120
|
-
|
122
|
+
record = Record(data=data, stream_name=self.name, associated_slice=stream_slice)
|
123
|
+
if self.file_uploader:
|
124
|
+
self.file_uploader.upload(record)
|
125
|
+
yield record
|
121
126
|
|
122
127
|
def _normalize_by_schema(
|
123
128
|
self, records: Iterable[Mapping[str, Any]], schema: Optional[Mapping[str, Any]]
|
@@ -79,7 +79,6 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
79
79
|
connector_state_manager: ConnectorStateManager,
|
80
80
|
connector_state_converter: AbstractStreamStateConverter,
|
81
81
|
cursor_field: CursorField,
|
82
|
-
use_global_cursor: bool = False,
|
83
82
|
) -> None:
|
84
83
|
self._global_cursor: Optional[StreamState] = {}
|
85
84
|
self._stream_name = stream_name
|
@@ -107,7 +106,7 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
107
106
|
self._lookback_window: int = 0
|
108
107
|
self._parent_state: Optional[StreamState] = None
|
109
108
|
self._number_of_partitions: int = 0
|
110
|
-
self._use_global_cursor: bool =
|
109
|
+
self._use_global_cursor: bool = False
|
111
110
|
self._partition_serializer = PerPartitionKeySerializer()
|
112
111
|
# Track the last time a state message was emitted
|
113
112
|
self._last_emission_time: float = 0.0
|
@@ -156,7 +156,7 @@ def duration(datestring: str) -> Union[datetime.timedelta, isodate.Duration]:
|
|
156
156
|
|
157
157
|
|
158
158
|
def format_datetime(
|
159
|
-
dt: Union[str, datetime.datetime
|
159
|
+
dt: Union[str, datetime.datetime], format: str, input_format: Optional[str] = None
|
160
160
|
) -> str:
|
161
161
|
"""
|
162
162
|
Converts datetime to another format
|
@@ -170,13 +170,9 @@ def format_datetime(
|
|
170
170
|
"""
|
171
171
|
if isinstance(dt, datetime.datetime):
|
172
172
|
return dt.strftime(format)
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
else:
|
177
|
-
dt_datetime = (
|
178
|
-
datetime.datetime.strptime(dt, input_format) if input_format else str_to_datetime(dt)
|
179
|
-
)
|
173
|
+
dt_datetime = (
|
174
|
+
datetime.datetime.strptime(dt, input_format) if input_format else str_to_datetime(dt)
|
175
|
+
)
|
180
176
|
return DatetimeParser().format(dt=dt_datetime, format=format)
|
181
177
|
|
182
178
|
|
@@ -106,7 +106,6 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
106
106
|
AlwaysLogSliceLogger() if emit_connector_builder_messages else DebugSliceLogger()
|
107
107
|
)
|
108
108
|
|
109
|
-
self._config = config or {}
|
110
109
|
self._validate_source()
|
111
110
|
|
112
111
|
@property
|
@@ -117,12 +116,6 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
117
116
|
def message_repository(self) -> MessageRepository:
|
118
117
|
return self._message_repository
|
119
118
|
|
120
|
-
@property
|
121
|
-
def dynamic_streams(self) -> List[Dict[str, Any]]:
|
122
|
-
return self._dynamic_stream_configs(
|
123
|
-
manifest=self._source_config, config=self._config, with_dynamic_stream_name=True
|
124
|
-
)
|
125
|
-
|
126
119
|
@property
|
127
120
|
def connection_checker(self) -> ConnectionChecker:
|
128
121
|
check = self._source_config["check"]
|
@@ -355,16 +348,13 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
355
348
|
return stream_configs
|
356
349
|
|
357
350
|
def _dynamic_stream_configs(
|
358
|
-
self,
|
359
|
-
manifest: Mapping[str, Any],
|
360
|
-
config: Mapping[str, Any],
|
361
|
-
with_dynamic_stream_name: Optional[bool] = None,
|
351
|
+
self, manifest: Mapping[str, Any], config: Mapping[str, Any]
|
362
352
|
) -> List[Dict[str, Any]]:
|
363
353
|
dynamic_stream_definitions: List[Dict[str, Any]] = manifest.get("dynamic_streams", [])
|
364
354
|
dynamic_stream_configs: List[Dict[str, Any]] = []
|
365
355
|
seen_dynamic_streams: Set[str] = set()
|
366
356
|
|
367
|
-
for
|
357
|
+
for dynamic_definition in dynamic_stream_definitions:
|
368
358
|
components_resolver_config = dynamic_definition["components_resolver"]
|
369
359
|
|
370
360
|
if not components_resolver_config:
|
@@ -397,23 +387,12 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
397
387
|
for dynamic_stream in components_resolver.resolve_components(
|
398
388
|
stream_template_config=stream_template_config
|
399
389
|
):
|
400
|
-
dynamic_stream = {
|
401
|
-
**ManifestComponentTransformer().propagate_types_and_parameters(
|
402
|
-
"", dynamic_stream, {}, use_parent_parameters=True
|
403
|
-
)
|
404
|
-
}
|
405
|
-
|
406
390
|
if "type" not in dynamic_stream:
|
407
391
|
dynamic_stream["type"] = "DeclarativeStream"
|
408
392
|
|
409
393
|
# Ensure that each stream is created with a unique name
|
410
394
|
name = dynamic_stream.get("name")
|
411
395
|
|
412
|
-
if with_dynamic_stream_name:
|
413
|
-
dynamic_stream["dynamic_stream_name"] = dynamic_definition.get(
|
414
|
-
"name", f"dynamic_stream_{dynamic_definition_index}"
|
415
|
-
)
|
416
|
-
|
417
396
|
if not isinstance(name, str):
|
418
397
|
raise ValueError(
|
419
398
|
f"Expected stream name {name} to be a string, got {type(name)}."
|