airbyte-cdk 6.44.0__py3-none-any.whl → 6.45.0.dev4100__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/connector_builder_handler.py +6 -45
- airbyte_cdk/connector_builder/main.py +2 -5
- airbyte_cdk/models/__init__.py +1 -0
- airbyte_cdk/models/airbyte_protocol.py +1 -3
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +1 -1
- airbyte_cdk/sources/declarative/async_job/job.py +0 -6
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +18 -18
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +6 -22
- airbyte_cdk/sources/declarative/checks/__init__.py +2 -5
- airbyte_cdk/sources/declarative/checks/check_stream.py +11 -113
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +8 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +49 -93
- airbyte_cdk/sources/declarative/extractors/record_selector.py +6 -1
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +1 -2
- airbyte_cdk/sources/declarative/interpolation/macros.py +4 -8
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +2 -23
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +42 -68
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +4 -16
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +42 -83
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +0 -4
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +1 -5
- airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +9 -4
- airbyte_cdk/sources/declarative/transformations/add_fields.py +1 -3
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +9 -9
- airbyte_cdk/sources/file_based/file_record_data.py +24 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +8 -15
- airbyte_cdk/sources/file_based/schema_helpers.py +11 -1
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +0 -1
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +16 -31
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +1 -3
- airbyte_cdk/sources/streams/concurrent/default_stream.py +3 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +0 -4
- airbyte_cdk/sources/types.py +11 -2
- airbyte_cdk/sources/utils/files_directory.py +15 -0
- airbyte_cdk/sources/utils/record_helper.py +8 -8
- {airbyte_cdk-6.44.0.dist-info → airbyte_cdk-6.45.0.dev4100.dist-info}/METADATA +2 -2
- {airbyte_cdk-6.44.0.dist-info → airbyte_cdk-6.45.0.dev4100.dist-info}/RECORD +42 -41
- airbyte_cdk/models/file_transfer_record_message.py +0 -13
- airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +0 -150
- {airbyte_cdk-6.44.0.dist-info → airbyte_cdk-6.45.0.dev4100.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.44.0.dist-info → airbyte_cdk-6.45.0.dev4100.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.44.0.dist-info → airbyte_cdk-6.45.0.dev4100.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.44.0.dist-info → airbyte_cdk-6.45.0.dev4100.dist-info}/entry_points.txt +0 -0
@@ -47,12 +47,7 @@ properties:
|
|
47
47
|
max_concurrent_async_job_count:
|
48
48
|
title: Maximum Concurrent Asynchronous Jobs
|
49
49
|
description: Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.
|
50
|
-
type:
|
51
|
-
- integer
|
52
|
-
- string
|
53
|
-
examples:
|
54
|
-
- 3
|
55
|
-
- "{{ config['max_concurrent_async_job_count'] }}"
|
50
|
+
type: integer
|
56
51
|
metadata:
|
57
52
|
type: object
|
58
53
|
description: For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.
|
@@ -316,6 +311,7 @@ definitions:
|
|
316
311
|
type: object
|
317
312
|
required:
|
318
313
|
- type
|
314
|
+
- stream_names
|
319
315
|
properties:
|
320
316
|
type:
|
321
317
|
type: string
|
@@ -329,28 +325,6 @@ definitions:
|
|
329
325
|
examples:
|
330
326
|
- ["users"]
|
331
327
|
- ["users", "contacts"]
|
332
|
-
dynamic_streams_check_configs:
|
333
|
-
type: array
|
334
|
-
items:
|
335
|
-
"$ref": "#/definitions/DynamicStreamCheckConfig"
|
336
|
-
DynamicStreamCheckConfig:
|
337
|
-
type: object
|
338
|
-
required:
|
339
|
-
- type
|
340
|
-
- dynamic_stream_name
|
341
|
-
properties:
|
342
|
-
type:
|
343
|
-
type: string
|
344
|
-
enum: [ DynamicStreamCheckConfig ]
|
345
|
-
dynamic_stream_name:
|
346
|
-
title: Dynamic Stream Name
|
347
|
-
description: The dynamic stream name.
|
348
|
-
type: string
|
349
|
-
stream_count:
|
350
|
-
title: Stream Count
|
351
|
-
description: The number of streams to attempt reading from during a check operation. If `stream_count` exceeds the total number of available streams, the minimum of the two values will be used.
|
352
|
-
type: integer
|
353
|
-
default: 0
|
354
328
|
CheckDynamicStream:
|
355
329
|
title: Dynamic Streams to Check
|
356
330
|
description: (This component is experimental. Use at your own risk.) Defines the dynamic streams to try reading when running a check operation.
|
@@ -1448,6 +1422,42 @@ definitions:
|
|
1448
1422
|
- "$ref": "#/definitions/LegacyToPerPartitionStateMigration"
|
1449
1423
|
- "$ref": "#/definitions/CustomStateMigration"
|
1450
1424
|
default: []
|
1425
|
+
file_uploader:
|
1426
|
+
title: File Uploader
|
1427
|
+
description: (experimental) Describes how to fetch a file
|
1428
|
+
type: object
|
1429
|
+
required:
|
1430
|
+
- type
|
1431
|
+
- requester
|
1432
|
+
- download_target_extractor
|
1433
|
+
properties:
|
1434
|
+
type:
|
1435
|
+
type: string
|
1436
|
+
enum: [ FileUploader ]
|
1437
|
+
requester:
|
1438
|
+
description: Requester component that describes how to prepare HTTP requests to send to the source API.
|
1439
|
+
anyOf:
|
1440
|
+
- "$ref": "#/definitions/CustomRequester"
|
1441
|
+
- "$ref": "#/definitions/HttpRequester"
|
1442
|
+
download_target_extractor:
|
1443
|
+
description: Responsible for fetching the url where the file is located. This is applied on each records and not on the HTTP response
|
1444
|
+
anyOf:
|
1445
|
+
- "$ref": "#/definitions/CustomRecordExtractor"
|
1446
|
+
- "$ref": "#/definitions/DpathExtractor"
|
1447
|
+
file_extractor:
|
1448
|
+
description: Responsible for fetching the content of the file. If not defined, the assumption is that the whole response body is the file content
|
1449
|
+
anyOf:
|
1450
|
+
- "$ref": "#/definitions/CustomRecordExtractor"
|
1451
|
+
- "$ref": "#/definitions/DpathExtractor"
|
1452
|
+
filename_extractor:
|
1453
|
+
description: Defines the name to store the file. Stream name is automatically added to the file path. File unique ID can be used to avoid overwriting files. Random UUID will be used if the extractor is not provided.
|
1454
|
+
type: string
|
1455
|
+
interpolation_context:
|
1456
|
+
- config
|
1457
|
+
- record
|
1458
|
+
examples:
|
1459
|
+
- "{{ record.id }}/{{ record.file_name }}/"
|
1460
|
+
- "{{ record.id }}_{{ record.file_name }}/"
|
1451
1461
|
$parameters:
|
1452
1462
|
type: object
|
1453
1463
|
additional_properties: true
|
@@ -2218,8 +2228,7 @@ definitions:
|
|
2218
2228
|
type: object
|
2219
2229
|
additionalProperties: true
|
2220
2230
|
JsonDecoder:
|
2221
|
-
title:
|
2222
|
-
description: Select 'JSON' if the response is formatted as a JSON object.
|
2231
|
+
title: Json Decoder
|
2223
2232
|
type: object
|
2224
2233
|
required:
|
2225
2234
|
- type
|
@@ -2228,8 +2237,8 @@ definitions:
|
|
2228
2237
|
type: string
|
2229
2238
|
enum: [JsonDecoder]
|
2230
2239
|
JsonlDecoder:
|
2231
|
-
title:
|
2232
|
-
description:
|
2240
|
+
title: JSONL Decoder
|
2241
|
+
description: Use this if the response consists of JSON objects separated by new lines (`\n`) in JSONL format.
|
2233
2242
|
type: object
|
2234
2243
|
required:
|
2235
2244
|
- type
|
@@ -2354,8 +2363,8 @@ definitions:
|
|
2354
2363
|
type: object
|
2355
2364
|
additionalProperties: true
|
2356
2365
|
IterableDecoder:
|
2357
|
-
title: Iterable
|
2358
|
-
description:
|
2366
|
+
title: Iterable Decoder
|
2367
|
+
description: Use this if the response consists of strings separated by new lines (`\n`). The Decoder will wrap each row into a JSON object with the `record` key.
|
2359
2368
|
type: object
|
2360
2369
|
required:
|
2361
2370
|
- type
|
@@ -2364,8 +2373,8 @@ definitions:
|
|
2364
2373
|
type: string
|
2365
2374
|
enum: [IterableDecoder]
|
2366
2375
|
XmlDecoder:
|
2367
|
-
title: XML
|
2368
|
-
description:
|
2376
|
+
title: XML Decoder
|
2377
|
+
description: Use this if the response is XML.
|
2369
2378
|
type: object
|
2370
2379
|
required:
|
2371
2380
|
- type
|
@@ -2396,8 +2405,8 @@ definitions:
|
|
2396
2405
|
type: object
|
2397
2406
|
additionalProperties: true
|
2398
2407
|
ZipfileDecoder:
|
2399
|
-
title:
|
2400
|
-
description:
|
2408
|
+
title: Zipfile Decoder
|
2409
|
+
description: Decoder for response data that is returned as zipfile(s).
|
2401
2410
|
type: object
|
2402
2411
|
additionalProperties: true
|
2403
2412
|
required:
|
@@ -2921,7 +2930,7 @@ definitions:
|
|
2921
2930
|
title: Lazy Read Pointer
|
2922
2931
|
description: If set, this will enable lazy reading, using the initial read of parent records to extract child records.
|
2923
2932
|
type: array
|
2924
|
-
default: []
|
2933
|
+
default: [ ]
|
2925
2934
|
items:
|
2926
2935
|
- type: string
|
2927
2936
|
interpolation_context:
|
@@ -3226,7 +3235,7 @@ definitions:
|
|
3226
3235
|
properties:
|
3227
3236
|
type:
|
3228
3237
|
type: string
|
3229
|
-
enum: [StateDelegatingStream]
|
3238
|
+
enum: [ StateDelegatingStream ]
|
3230
3239
|
name:
|
3231
3240
|
title: Name
|
3232
3241
|
description: The stream name.
|
@@ -3281,14 +3290,12 @@ definitions:
|
|
3281
3290
|
- "$ref": "#/definitions/CustomPartitionRouter"
|
3282
3291
|
- "$ref": "#/definitions/ListPartitionRouter"
|
3283
3292
|
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3284
|
-
- "$ref": "#/definitions/GroupingPartitionRouter"
|
3285
3293
|
- type: array
|
3286
3294
|
items:
|
3287
3295
|
anyOf:
|
3288
3296
|
- "$ref": "#/definitions/CustomPartitionRouter"
|
3289
3297
|
- "$ref": "#/definitions/ListPartitionRouter"
|
3290
3298
|
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3291
|
-
- "$ref": "#/definitions/GroupingPartitionRouter"
|
3292
3299
|
decoder:
|
3293
3300
|
title: Decoder
|
3294
3301
|
description: Component decoding the response so records can be extracted.
|
@@ -3305,8 +3312,6 @@ definitions:
|
|
3305
3312
|
type: object
|
3306
3313
|
additionalProperties: true
|
3307
3314
|
GzipDecoder:
|
3308
|
-
title: gzip
|
3309
|
-
description: Select 'gzip' for response data that is compressed with gzip. Requires specifying an inner data type/decoder to parse the decompressed data.
|
3310
3315
|
type: object
|
3311
3316
|
required:
|
3312
3317
|
- type
|
@@ -3322,8 +3327,6 @@ definitions:
|
|
3322
3327
|
- "$ref": "#/definitions/JsonDecoder"
|
3323
3328
|
- "$ref": "#/definitions/JsonlDecoder"
|
3324
3329
|
CsvDecoder:
|
3325
|
-
title: CSV
|
3326
|
-
description: "Select 'CSV' for response data that is formatted as CSV (comma-separated values). Can specify an encoding (default: 'utf-8') and a delimiter (default: ',')."
|
3327
3330
|
type: object
|
3328
3331
|
required:
|
3329
3332
|
- type
|
@@ -3454,14 +3457,12 @@ definitions:
|
|
3454
3457
|
- "$ref": "#/definitions/CustomPartitionRouter"
|
3455
3458
|
- "$ref": "#/definitions/ListPartitionRouter"
|
3456
3459
|
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3457
|
-
- "$ref": "#/definitions/GroupingPartitionRouter"
|
3458
3460
|
- type: array
|
3459
3461
|
items:
|
3460
3462
|
anyOf:
|
3461
3463
|
- "$ref": "#/definitions/CustomPartitionRouter"
|
3462
3464
|
- "$ref": "#/definitions/ListPartitionRouter"
|
3463
3465
|
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3464
|
-
- "$ref": "#/definitions/GroupingPartitionRouter"
|
3465
3466
|
decoder:
|
3466
3467
|
title: Decoder
|
3467
3468
|
description: Component decoding the response so records can be extracted.
|
@@ -3578,44 +3579,6 @@ definitions:
|
|
3578
3579
|
$parameters:
|
3579
3580
|
type: object
|
3580
3581
|
additionalProperties: true
|
3581
|
-
GroupingPartitionRouter:
|
3582
|
-
title: Grouping Partition Router
|
3583
|
-
description: >
|
3584
|
-
A decorator on top of a partition router that groups partitions into batches of a specified size.
|
3585
|
-
This is useful for APIs that support filtering by multiple partition keys in a single request.
|
3586
|
-
Note that per-partition incremental syncs may not work as expected because the grouping
|
3587
|
-
of partitions might change between syncs, potentially leading to inconsistent state tracking.
|
3588
|
-
type: object
|
3589
|
-
required:
|
3590
|
-
- type
|
3591
|
-
- group_size
|
3592
|
-
- underlying_partition_router
|
3593
|
-
properties:
|
3594
|
-
type:
|
3595
|
-
type: string
|
3596
|
-
enum: [GroupingPartitionRouter]
|
3597
|
-
group_size:
|
3598
|
-
title: Group Size
|
3599
|
-
description: The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.
|
3600
|
-
type: integer
|
3601
|
-
examples:
|
3602
|
-
- 10
|
3603
|
-
- 50
|
3604
|
-
underlying_partition_router:
|
3605
|
-
title: Underlying Partition Router
|
3606
|
-
description: The partition router whose output will be grouped. This can be any valid partition router component.
|
3607
|
-
anyOf:
|
3608
|
-
- "$ref": "#/definitions/CustomPartitionRouter"
|
3609
|
-
- "$ref": "#/definitions/ListPartitionRouter"
|
3610
|
-
- "$ref": "#/definitions/SubstreamPartitionRouter"
|
3611
|
-
deduplicate:
|
3612
|
-
title: Deduplicate Partitions
|
3613
|
-
description: If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.
|
3614
|
-
type: boolean
|
3615
|
-
default: true
|
3616
|
-
$parameters:
|
3617
|
-
type: object
|
3618
|
-
additionalProperties: true
|
3619
3582
|
WaitUntilTimeFromHeader:
|
3620
3583
|
title: Wait Until Time Defined In Response Header
|
3621
3584
|
description: Extract time at which we can retry the request from response header and wait for the difference between now and that time.
|
@@ -3787,13 +3750,6 @@ definitions:
|
|
3787
3750
|
type:
|
3788
3751
|
type: string
|
3789
3752
|
enum: [DynamicDeclarativeStream]
|
3790
|
-
name:
|
3791
|
-
title: Name
|
3792
|
-
description: The dynamic stream name.
|
3793
|
-
type: string
|
3794
|
-
default: ""
|
3795
|
-
example:
|
3796
|
-
- "Tables"
|
3797
3753
|
stream_template:
|
3798
3754
|
title: Stream Template
|
3799
3755
|
description: Reference to the stream template.
|
@@ -15,6 +15,7 @@ from airbyte_cdk.sources.declarative.extractors.type_transformer import (
|
|
15
15
|
)
|
16
16
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
17
17
|
from airbyte_cdk.sources.declarative.models import SchemaNormalization
|
18
|
+
from airbyte_cdk.sources.declarative.retrievers.file_uploader import FileUploader
|
18
19
|
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
19
20
|
from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
|
20
21
|
from airbyte_cdk.sources.utils.transform import TypeTransformer
|
@@ -42,6 +43,7 @@ class RecordSelector(HttpSelector):
|
|
42
43
|
record_filter: Optional[RecordFilter] = None
|
43
44
|
transformations: List[RecordTransformation] = field(default_factory=lambda: [])
|
44
45
|
transform_before_filtering: bool = False
|
46
|
+
file_uploader: Optional[FileUploader] = None
|
45
47
|
|
46
48
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
47
49
|
self._parameters = parameters
|
@@ -117,7 +119,10 @@ class RecordSelector(HttpSelector):
|
|
117
119
|
transformed_filtered_data, schema=records_schema
|
118
120
|
)
|
119
121
|
for data in normalized_data:
|
120
|
-
|
122
|
+
record = Record(data=data, stream_name=self.name, associated_slice=stream_slice)
|
123
|
+
if self.file_uploader:
|
124
|
+
self.file_uploader.upload(record)
|
125
|
+
yield record
|
121
126
|
|
122
127
|
def _normalize_by_schema(
|
123
128
|
self, records: Iterable[Mapping[str, Any]], schema: Optional[Mapping[str, Any]]
|
@@ -79,7 +79,6 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
79
79
|
connector_state_manager: ConnectorStateManager,
|
80
80
|
connector_state_converter: AbstractStreamStateConverter,
|
81
81
|
cursor_field: CursorField,
|
82
|
-
use_global_cursor: bool = False,
|
83
82
|
) -> None:
|
84
83
|
self._global_cursor: Optional[StreamState] = {}
|
85
84
|
self._stream_name = stream_name
|
@@ -107,7 +106,7 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
107
106
|
self._lookback_window: int = 0
|
108
107
|
self._parent_state: Optional[StreamState] = None
|
109
108
|
self._number_of_partitions: int = 0
|
110
|
-
self._use_global_cursor: bool =
|
109
|
+
self._use_global_cursor: bool = False
|
111
110
|
self._partition_serializer = PerPartitionKeySerializer()
|
112
111
|
# Track the last time a state message was emitted
|
113
112
|
self._last_emission_time: float = 0.0
|
@@ -156,7 +156,7 @@ def duration(datestring: str) -> Union[datetime.timedelta, isodate.Duration]:
|
|
156
156
|
|
157
157
|
|
158
158
|
def format_datetime(
|
159
|
-
dt: Union[str, datetime.datetime
|
159
|
+
dt: Union[str, datetime.datetime], format: str, input_format: Optional[str] = None
|
160
160
|
) -> str:
|
161
161
|
"""
|
162
162
|
Converts datetime to another format
|
@@ -170,13 +170,9 @@ def format_datetime(
|
|
170
170
|
"""
|
171
171
|
if isinstance(dt, datetime.datetime):
|
172
172
|
return dt.strftime(format)
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
else:
|
177
|
-
dt_datetime = (
|
178
|
-
datetime.datetime.strptime(dt, input_format) if input_format else str_to_datetime(dt)
|
179
|
-
)
|
173
|
+
dt_datetime = (
|
174
|
+
datetime.datetime.strptime(dt, input_format) if input_format else str_to_datetime(dt)
|
175
|
+
)
|
180
176
|
return DatetimeParser().format(dt=dt_datetime, format=format)
|
181
177
|
|
182
178
|
|
@@ -106,7 +106,6 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
106
106
|
AlwaysLogSliceLogger() if emit_connector_builder_messages else DebugSliceLogger()
|
107
107
|
)
|
108
108
|
|
109
|
-
self._config = config or {}
|
110
109
|
self._validate_source()
|
111
110
|
|
112
111
|
@property
|
@@ -117,12 +116,6 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
117
116
|
def message_repository(self) -> MessageRepository:
|
118
117
|
return self._message_repository
|
119
118
|
|
120
|
-
@property
|
121
|
-
def dynamic_streams(self) -> List[Dict[str, Any]]:
|
122
|
-
return self._dynamic_stream_configs(
|
123
|
-
manifest=self._source_config, config=self._config, with_dynamic_stream_name=True
|
124
|
-
)
|
125
|
-
|
126
119
|
@property
|
127
120
|
def connection_checker(self) -> ConnectionChecker:
|
128
121
|
check = self._source_config["check"]
|
@@ -355,16 +348,13 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
355
348
|
return stream_configs
|
356
349
|
|
357
350
|
def _dynamic_stream_configs(
|
358
|
-
self,
|
359
|
-
manifest: Mapping[str, Any],
|
360
|
-
config: Mapping[str, Any],
|
361
|
-
with_dynamic_stream_name: Optional[bool] = None,
|
351
|
+
self, manifest: Mapping[str, Any], config: Mapping[str, Any]
|
362
352
|
) -> List[Dict[str, Any]]:
|
363
353
|
dynamic_stream_definitions: List[Dict[str, Any]] = manifest.get("dynamic_streams", [])
|
364
354
|
dynamic_stream_configs: List[Dict[str, Any]] = []
|
365
355
|
seen_dynamic_streams: Set[str] = set()
|
366
356
|
|
367
|
-
for
|
357
|
+
for dynamic_definition in dynamic_stream_definitions:
|
368
358
|
components_resolver_config = dynamic_definition["components_resolver"]
|
369
359
|
|
370
360
|
if not components_resolver_config:
|
@@ -397,23 +387,12 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
397
387
|
for dynamic_stream in components_resolver.resolve_components(
|
398
388
|
stream_template_config=stream_template_config
|
399
389
|
):
|
400
|
-
dynamic_stream = {
|
401
|
-
**ManifestComponentTransformer().propagate_types_and_parameters(
|
402
|
-
"", dynamic_stream, {}, use_parent_parameters=True
|
403
|
-
)
|
404
|
-
}
|
405
|
-
|
406
390
|
if "type" not in dynamic_stream:
|
407
391
|
dynamic_stream["type"] = "DeclarativeStream"
|
408
392
|
|
409
393
|
# Ensure that each stream is created with a unique name
|
410
394
|
name = dynamic_stream.get("name")
|
411
395
|
|
412
|
-
if with_dynamic_stream_name:
|
413
|
-
dynamic_stream["dynamic_stream_name"] = dynamic_definition.get(
|
414
|
-
"name", f"dynamic_stream_{dynamic_definition_index}"
|
415
|
-
)
|
416
|
-
|
417
396
|
if not isinstance(name, str):
|
418
397
|
raise ValueError(
|
419
398
|
f"Expected stream name {name} to be a string, got {type(name)}."
|
@@ -42,15 +42,13 @@ class BearerAuthenticator(BaseModel):
|
|
42
42
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
43
43
|
|
44
44
|
|
45
|
-
class
|
46
|
-
type: Literal["
|
47
|
-
|
48
|
-
...,
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
description="Numbers of the streams to try reading from when running a check operation.",
|
53
|
-
title="Stream Count",
|
45
|
+
class CheckStream(BaseModel):
|
46
|
+
type: Literal["CheckStream"]
|
47
|
+
stream_names: List[str] = Field(
|
48
|
+
...,
|
49
|
+
description="Names of the streams to try reading from when running a check operation.",
|
50
|
+
examples=[["users"], ["users", "contacts"]],
|
51
|
+
title="Stream Names",
|
54
52
|
)
|
55
53
|
|
56
54
|
|
@@ -1525,17 +1523,6 @@ class AuthFlow(BaseModel):
|
|
1525
1523
|
oauth_config_specification: Optional[OAuthConfigSpecification] = None
|
1526
1524
|
|
1527
1525
|
|
1528
|
-
class CheckStream(BaseModel):
|
1529
|
-
type: Literal["CheckStream"]
|
1530
|
-
stream_names: Optional[List[str]] = Field(
|
1531
|
-
None,
|
1532
|
-
description="Names of the streams to try reading from when running a check operation.",
|
1533
|
-
examples=[["users"], ["users", "contacts"]],
|
1534
|
-
title="Stream Names",
|
1535
|
-
)
|
1536
|
-
dynamic_streams_check_configs: Optional[List[DynamicStreamCheckConfig]] = None
|
1537
|
-
|
1538
|
-
|
1539
1526
|
class IncrementingCountCursor(BaseModel):
|
1540
1527
|
type: Literal["IncrementingCountCursor"]
|
1541
1528
|
cursor_field: str = Field(
|
@@ -1903,10 +1890,9 @@ class DeclarativeSource1(BaseModel):
|
|
1903
1890
|
spec: Optional[Spec] = None
|
1904
1891
|
concurrency_level: Optional[ConcurrencyLevel] = None
|
1905
1892
|
api_budget: Optional[HTTPAPIBudget] = None
|
1906
|
-
max_concurrent_async_job_count: Optional[
|
1893
|
+
max_concurrent_async_job_count: Optional[int] = Field(
|
1907
1894
|
None,
|
1908
1895
|
description="Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.",
|
1909
|
-
examples=[3, "{{ config['max_concurrent_async_job_count'] }}"],
|
1910
1896
|
title="Maximum Concurrent Asynchronous Jobs",
|
1911
1897
|
)
|
1912
1898
|
metadata: Optional[Dict[str, Any]] = Field(
|
@@ -1936,10 +1922,9 @@ class DeclarativeSource2(BaseModel):
|
|
1936
1922
|
spec: Optional[Spec] = None
|
1937
1923
|
concurrency_level: Optional[ConcurrencyLevel] = None
|
1938
1924
|
api_budget: Optional[HTTPAPIBudget] = None
|
1939
|
-
max_concurrent_async_job_count: Optional[
|
1925
|
+
max_concurrent_async_job_count: Optional[int] = Field(
|
1940
1926
|
None,
|
1941
1927
|
description="Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.",
|
1942
|
-
examples=[3, "{{ config['max_concurrent_async_job_count'] }}"],
|
1943
1928
|
title="Maximum Concurrent Asynchronous Jobs",
|
1944
1929
|
)
|
1945
1930
|
metadata: Optional[Dict[str, Any]] = Field(
|
@@ -2004,6 +1989,31 @@ class SelectiveAuthenticator(BaseModel):
|
|
2004
1989
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2005
1990
|
|
2006
1991
|
|
1992
|
+
class FileUploader(BaseModel):
|
1993
|
+
type: Literal["FileUploader"]
|
1994
|
+
requester: Union[CustomRequester, HttpRequester] = Field(
|
1995
|
+
...,
|
1996
|
+
description="Requester component that describes how to prepare HTTP requests to send to the source API.",
|
1997
|
+
)
|
1998
|
+
download_target_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field(
|
1999
|
+
...,
|
2000
|
+
description="Responsible for fetching the url where the file is located. This is applied on each records and not on the HTTP response",
|
2001
|
+
)
|
2002
|
+
file_extractor: Optional[Union[CustomRecordExtractor, DpathExtractor]] = Field(
|
2003
|
+
None,
|
2004
|
+
description="Responsible for fetching the content of the file. If not defined, the assumption is that the whole response body is the file content",
|
2005
|
+
)
|
2006
|
+
filename_extractor: Optional[str] = Field(
|
2007
|
+
None,
|
2008
|
+
description="Defines the name to store the file. Stream name is automatically added to the file path. File unique ID can be used to avoid overwriting files. Random UUID will be used if the extractor is not provided.",
|
2009
|
+
examples=[
|
2010
|
+
"{{ record.id }}/{{ record.file_name }}/",
|
2011
|
+
"{{ record.id }}_{{ record.file_name }}/",
|
2012
|
+
],
|
2013
|
+
)
|
2014
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2015
|
+
|
2016
|
+
|
2007
2017
|
class DeclarativeStream(BaseModel):
|
2008
2018
|
class Config:
|
2009
2019
|
extra = Extra.allow
|
@@ -2062,6 +2072,11 @@ class DeclarativeStream(BaseModel):
|
|
2062
2072
|
description="Array of state migrations to be applied on the input state",
|
2063
2073
|
title="State Migrations",
|
2064
2074
|
)
|
2075
|
+
file_uploader: Optional[FileUploader] = Field(
|
2076
|
+
None,
|
2077
|
+
description="(experimental) Describes how to fetch a file",
|
2078
|
+
title="File Uploader",
|
2079
|
+
)
|
2065
2080
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2066
2081
|
|
2067
2082
|
|
@@ -2316,15 +2331,7 @@ class SimpleRetriever(BaseModel):
|
|
2316
2331
|
CustomPartitionRouter,
|
2317
2332
|
ListPartitionRouter,
|
2318
2333
|
SubstreamPartitionRouter,
|
2319
|
-
|
2320
|
-
List[
|
2321
|
-
Union[
|
2322
|
-
CustomPartitionRouter,
|
2323
|
-
ListPartitionRouter,
|
2324
|
-
SubstreamPartitionRouter,
|
2325
|
-
GroupingPartitionRouter,
|
2326
|
-
]
|
2327
|
-
],
|
2334
|
+
List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
|
2328
2335
|
]
|
2329
2336
|
] = Field(
|
2330
2337
|
[],
|
@@ -2406,15 +2413,7 @@ class AsyncRetriever(BaseModel):
|
|
2406
2413
|
CustomPartitionRouter,
|
2407
2414
|
ListPartitionRouter,
|
2408
2415
|
SubstreamPartitionRouter,
|
2409
|
-
|
2410
|
-
List[
|
2411
|
-
Union[
|
2412
|
-
CustomPartitionRouter,
|
2413
|
-
ListPartitionRouter,
|
2414
|
-
SubstreamPartitionRouter,
|
2415
|
-
GroupingPartitionRouter,
|
2416
|
-
]
|
2417
|
-
],
|
2416
|
+
List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
|
2418
2417
|
]
|
2419
2418
|
] = Field(
|
2420
2419
|
[],
|
@@ -2466,29 +2465,6 @@ class SubstreamPartitionRouter(BaseModel):
|
|
2466
2465
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2467
2466
|
|
2468
2467
|
|
2469
|
-
class GroupingPartitionRouter(BaseModel):
|
2470
|
-
type: Literal["GroupingPartitionRouter"]
|
2471
|
-
group_size: int = Field(
|
2472
|
-
...,
|
2473
|
-
description="The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.",
|
2474
|
-
examples=[10, 50],
|
2475
|
-
title="Group Size",
|
2476
|
-
)
|
2477
|
-
underlying_partition_router: Union[
|
2478
|
-
CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter
|
2479
|
-
] = Field(
|
2480
|
-
...,
|
2481
|
-
description="The partition router whose output will be grouped. This can be any valid partition router component.",
|
2482
|
-
title="Underlying Partition Router",
|
2483
|
-
)
|
2484
|
-
deduplicate: Optional[bool] = Field(
|
2485
|
-
True,
|
2486
|
-
description="If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.",
|
2487
|
-
title="Deduplicate Partitions",
|
2488
|
-
)
|
2489
|
-
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2490
|
-
|
2491
|
-
|
2492
2468
|
class HttpComponentsResolver(BaseModel):
|
2493
2469
|
type: Literal["HttpComponentsResolver"]
|
2494
2470
|
retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field(
|
@@ -2502,9 +2478,6 @@ class HttpComponentsResolver(BaseModel):
|
|
2502
2478
|
|
2503
2479
|
class DynamicDeclarativeStream(BaseModel):
|
2504
2480
|
type: Literal["DynamicDeclarativeStream"]
|
2505
|
-
name: Optional[str] = Field(
|
2506
|
-
"", description="The dynamic stream name.", example=["Tables"], title="Name"
|
2507
|
-
)
|
2508
2481
|
stream_template: DeclarativeStream = Field(
|
2509
2482
|
..., description="Reference to the stream template.", title="Stream Template"
|
2510
2483
|
)
|
@@ -2521,6 +2494,7 @@ CompositeErrorHandler.update_forward_refs()
|
|
2521
2494
|
DeclarativeSource1.update_forward_refs()
|
2522
2495
|
DeclarativeSource2.update_forward_refs()
|
2523
2496
|
SelectiveAuthenticator.update_forward_refs()
|
2497
|
+
FileUploader.update_forward_refs()
|
2524
2498
|
DeclarativeStream.update_forward_refs()
|
2525
2499
|
SessionTokenAuthenticator.update_forward_refs()
|
2526
2500
|
DynamicSchemaLoader.update_forward_refs()
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
import copy
|
6
6
|
import typing
|
7
|
-
from typing import Any, Mapping
|
7
|
+
from typing import Any, Mapping
|
8
8
|
|
9
9
|
PARAMETERS_STR = "$parameters"
|
10
10
|
|
@@ -94,7 +94,6 @@ class ManifestComponentTransformer:
|
|
94
94
|
parent_field_identifier: str,
|
95
95
|
declarative_component: Mapping[str, Any],
|
96
96
|
parent_parameters: Mapping[str, Any],
|
97
|
-
use_parent_parameters: Optional[bool] = None,
|
98
97
|
) -> Mapping[str, Any]:
|
99
98
|
"""
|
100
99
|
Recursively transforms the specified declarative component and subcomponents to propagate parameters and insert the
|
@@ -104,7 +103,6 @@ class ManifestComponentTransformer:
|
|
104
103
|
:param declarative_component: The current component that is having type and parameters added
|
105
104
|
:param parent_field_identifier: The name of the field of the current component coming from the parent component
|
106
105
|
:param parent_parameters: The parameters set on parent components defined before the current component
|
107
|
-
:param use_parent_parameters: If set, parent parameters will be used as the source of truth when key names are the same
|
108
106
|
:return: A deep copy of the transformed component with types and parameters persisted to it
|
109
107
|
"""
|
110
108
|
propagated_component = dict(copy.deepcopy(declarative_component))
|
@@ -132,11 +130,7 @@ class ManifestComponentTransformer:
|
|
132
130
|
# level take precedence
|
133
131
|
current_parameters = dict(copy.deepcopy(parent_parameters))
|
134
132
|
component_parameters = propagated_component.pop(PARAMETERS_STR, {})
|
135
|
-
current_parameters =
|
136
|
-
{**component_parameters, **current_parameters}
|
137
|
-
if use_parent_parameters
|
138
|
-
else {**current_parameters, **component_parameters}
|
139
|
-
)
|
133
|
+
current_parameters = {**current_parameters, **component_parameters}
|
140
134
|
|
141
135
|
# Parameters should be applied to the current component fields with the existing field taking precedence over parameters if
|
142
136
|
# both exist
|
@@ -151,10 +145,7 @@ class ManifestComponentTransformer:
|
|
151
145
|
excluded_parameter = current_parameters.pop(field_name, None)
|
152
146
|
parent_type_field_identifier = f"{propagated_component.get('type')}.{field_name}"
|
153
147
|
propagated_component[field_name] = self.propagate_types_and_parameters(
|
154
|
-
parent_type_field_identifier,
|
155
|
-
field_value,
|
156
|
-
current_parameters,
|
157
|
-
use_parent_parameters=use_parent_parameters,
|
148
|
+
parent_type_field_identifier, field_value, current_parameters
|
158
149
|
)
|
159
150
|
if excluded_parameter:
|
160
151
|
current_parameters[field_name] = excluded_parameter
|
@@ -167,10 +158,7 @@ class ManifestComponentTransformer:
|
|
167
158
|
f"{propagated_component.get('type')}.{field_name}"
|
168
159
|
)
|
169
160
|
field_value[i] = self.propagate_types_and_parameters(
|
170
|
-
parent_type_field_identifier,
|
171
|
-
element,
|
172
|
-
current_parameters,
|
173
|
-
use_parent_parameters=use_parent_parameters,
|
161
|
+
parent_type_field_identifier, element, current_parameters
|
174
162
|
)
|
175
163
|
if excluded_parameter:
|
176
164
|
current_parameters[field_name] = excluded_parameter
|