airbyte-cdk 6.44.0__py3-none-any.whl → 6.45.0.dev4100__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. airbyte_cdk/connector_builder/connector_builder_handler.py +6 -45
  2. airbyte_cdk/connector_builder/main.py +2 -5
  3. airbyte_cdk/models/__init__.py +1 -0
  4. airbyte_cdk/models/airbyte_protocol.py +1 -3
  5. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +1 -1
  6. airbyte_cdk/sources/declarative/async_job/job.py +0 -6
  7. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +18 -18
  8. airbyte_cdk/sources/declarative/async_job/job_tracker.py +6 -22
  9. airbyte_cdk/sources/declarative/checks/__init__.py +2 -5
  10. airbyte_cdk/sources/declarative/checks/check_stream.py +11 -113
  11. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +8 -0
  12. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +49 -93
  13. airbyte_cdk/sources/declarative/extractors/record_selector.py +6 -1
  14. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +1 -2
  15. airbyte_cdk/sources/declarative/interpolation/macros.py +4 -8
  16. airbyte_cdk/sources/declarative/manifest_declarative_source.py +2 -23
  17. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +42 -68
  18. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +4 -16
  19. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +42 -83
  20. airbyte_cdk/sources/declarative/partition_routers/__init__.py +0 -4
  21. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +1 -5
  22. airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
  23. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +9 -4
  24. airbyte_cdk/sources/declarative/transformations/add_fields.py +1 -3
  25. airbyte_cdk/sources/file_based/file_based_stream_reader.py +9 -9
  26. airbyte_cdk/sources/file_based/file_record_data.py +24 -0
  27. airbyte_cdk/sources/file_based/file_types/file_transfer.py +8 -15
  28. airbyte_cdk/sources/file_based/schema_helpers.py +11 -1
  29. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +0 -1
  30. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +16 -31
  31. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +1 -3
  32. airbyte_cdk/sources/streams/concurrent/default_stream.py +3 -0
  33. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +0 -4
  34. airbyte_cdk/sources/types.py +11 -2
  35. airbyte_cdk/sources/utils/files_directory.py +15 -0
  36. airbyte_cdk/sources/utils/record_helper.py +8 -8
  37. {airbyte_cdk-6.44.0.dist-info → airbyte_cdk-6.45.0.dev4100.dist-info}/METADATA +2 -2
  38. {airbyte_cdk-6.44.0.dist-info → airbyte_cdk-6.45.0.dev4100.dist-info}/RECORD +42 -41
  39. airbyte_cdk/models/file_transfer_record_message.py +0 -13
  40. airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +0 -150
  41. {airbyte_cdk-6.44.0.dist-info → airbyte_cdk-6.45.0.dev4100.dist-info}/LICENSE.txt +0 -0
  42. {airbyte_cdk-6.44.0.dist-info → airbyte_cdk-6.45.0.dev4100.dist-info}/LICENSE_SHORT +0 -0
  43. {airbyte_cdk-6.44.0.dist-info → airbyte_cdk-6.45.0.dev4100.dist-info}/WHEEL +0 -0
  44. {airbyte_cdk-6.44.0.dist-info → airbyte_cdk-6.45.0.dev4100.dist-info}/entry_points.txt +0 -0
@@ -47,12 +47,7 @@ properties:
47
47
  max_concurrent_async_job_count:
48
48
  title: Maximum Concurrent Asynchronous Jobs
49
49
  description: Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.
50
- type:
51
- - integer
52
- - string
53
- examples:
54
- - 3
55
- - "{{ config['max_concurrent_async_job_count'] }}"
50
+ type: integer
56
51
  metadata:
57
52
  type: object
58
53
  description: For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.
@@ -316,6 +311,7 @@ definitions:
316
311
  type: object
317
312
  required:
318
313
  - type
314
+ - stream_names
319
315
  properties:
320
316
  type:
321
317
  type: string
@@ -329,28 +325,6 @@ definitions:
329
325
  examples:
330
326
  - ["users"]
331
327
  - ["users", "contacts"]
332
- dynamic_streams_check_configs:
333
- type: array
334
- items:
335
- "$ref": "#/definitions/DynamicStreamCheckConfig"
336
- DynamicStreamCheckConfig:
337
- type: object
338
- required:
339
- - type
340
- - dynamic_stream_name
341
- properties:
342
- type:
343
- type: string
344
- enum: [ DynamicStreamCheckConfig ]
345
- dynamic_stream_name:
346
- title: Dynamic Stream Name
347
- description: The dynamic stream name.
348
- type: string
349
- stream_count:
350
- title: Stream Count
351
- description: The number of streams to attempt reading from during a check operation. If `stream_count` exceeds the total number of available streams, the minimum of the two values will be used.
352
- type: integer
353
- default: 0
354
328
  CheckDynamicStream:
355
329
  title: Dynamic Streams to Check
356
330
  description: (This component is experimental. Use at your own risk.) Defines the dynamic streams to try reading when running a check operation.
@@ -1448,6 +1422,42 @@ definitions:
1448
1422
  - "$ref": "#/definitions/LegacyToPerPartitionStateMigration"
1449
1423
  - "$ref": "#/definitions/CustomStateMigration"
1450
1424
  default: []
1425
+ file_uploader:
1426
+ title: File Uploader
1427
+ description: (experimental) Describes how to fetch a file
1428
+ type: object
1429
+ required:
1430
+ - type
1431
+ - requester
1432
+ - download_target_extractor
1433
+ properties:
1434
+ type:
1435
+ type: string
1436
+ enum: [ FileUploader ]
1437
+ requester:
1438
+ description: Requester component that describes how to prepare HTTP requests to send to the source API.
1439
+ anyOf:
1440
+ - "$ref": "#/definitions/CustomRequester"
1441
+ - "$ref": "#/definitions/HttpRequester"
1442
+ download_target_extractor:
1443
+ description: Responsible for fetching the url where the file is located. This is applied on each records and not on the HTTP response
1444
+ anyOf:
1445
+ - "$ref": "#/definitions/CustomRecordExtractor"
1446
+ - "$ref": "#/definitions/DpathExtractor"
1447
+ file_extractor:
1448
+ description: Responsible for fetching the content of the file. If not defined, the assumption is that the whole response body is the file content
1449
+ anyOf:
1450
+ - "$ref": "#/definitions/CustomRecordExtractor"
1451
+ - "$ref": "#/definitions/DpathExtractor"
1452
+ filename_extractor:
1453
+ description: Defines the name to store the file. Stream name is automatically added to the file path. File unique ID can be used to avoid overwriting files. Random UUID will be used if the extractor is not provided.
1454
+ type: string
1455
+ interpolation_context:
1456
+ - config
1457
+ - record
1458
+ examples:
1459
+ - "{{ record.id }}/{{ record.file_name }}/"
1460
+ - "{{ record.id }}_{{ record.file_name }}/"
1451
1461
  $parameters:
1452
1462
  type: object
1453
1463
  additional_properties: true
@@ -2218,8 +2228,7 @@ definitions:
2218
2228
  type: object
2219
2229
  additionalProperties: true
2220
2230
  JsonDecoder:
2221
- title: JSON
2222
- description: Select 'JSON' if the response is formatted as a JSON object.
2231
+ title: Json Decoder
2223
2232
  type: object
2224
2233
  required:
2225
2234
  - type
@@ -2228,8 +2237,8 @@ definitions:
2228
2237
  type: string
2229
2238
  enum: [JsonDecoder]
2230
2239
  JsonlDecoder:
2231
- title: JSON Lines
2232
- description: Select 'JSON Lines' if the response consists of JSON objects separated by new lines ('\n') in JSONL format.
2240
+ title: JSONL Decoder
2241
+ description: Use this if the response consists of JSON objects separated by new lines (`\n`) in JSONL format.
2233
2242
  type: object
2234
2243
  required:
2235
2244
  - type
@@ -2354,8 +2363,8 @@ definitions:
2354
2363
  type: object
2355
2364
  additionalProperties: true
2356
2365
  IterableDecoder:
2357
- title: Iterable
2358
- description: Select 'Iterable' if the response consists of strings separated by new lines (`\n`). The string will then be wrapped into a JSON object with the `record` key.
2366
+ title: Iterable Decoder
2367
+ description: Use this if the response consists of strings separated by new lines (`\n`). The Decoder will wrap each row into a JSON object with the `record` key.
2359
2368
  type: object
2360
2369
  required:
2361
2370
  - type
@@ -2364,8 +2373,8 @@ definitions:
2364
2373
  type: string
2365
2374
  enum: [IterableDecoder]
2366
2375
  XmlDecoder:
2367
- title: XML
2368
- description: Select 'XML' if the response consists of XML-formatted data.
2376
+ title: XML Decoder
2377
+ description: Use this if the response is XML.
2369
2378
  type: object
2370
2379
  required:
2371
2380
  - type
@@ -2396,8 +2405,8 @@ definitions:
2396
2405
  type: object
2397
2406
  additionalProperties: true
2398
2407
  ZipfileDecoder:
2399
- title: ZIP File
2400
- description: Select 'ZIP file' for response data that is returned as a zipfile. Requires specifying an inner data type/decoder to parse the unzipped data.
2408
+ title: Zipfile Decoder
2409
+ description: Decoder for response data that is returned as zipfile(s).
2401
2410
  type: object
2402
2411
  additionalProperties: true
2403
2412
  required:
@@ -2921,7 +2930,7 @@ definitions:
2921
2930
  title: Lazy Read Pointer
2922
2931
  description: If set, this will enable lazy reading, using the initial read of parent records to extract child records.
2923
2932
  type: array
2924
- default: []
2933
+ default: [ ]
2925
2934
  items:
2926
2935
  - type: string
2927
2936
  interpolation_context:
@@ -3226,7 +3235,7 @@ definitions:
3226
3235
  properties:
3227
3236
  type:
3228
3237
  type: string
3229
- enum: [StateDelegatingStream]
3238
+ enum: [ StateDelegatingStream ]
3230
3239
  name:
3231
3240
  title: Name
3232
3241
  description: The stream name.
@@ -3281,14 +3290,12 @@ definitions:
3281
3290
  - "$ref": "#/definitions/CustomPartitionRouter"
3282
3291
  - "$ref": "#/definitions/ListPartitionRouter"
3283
3292
  - "$ref": "#/definitions/SubstreamPartitionRouter"
3284
- - "$ref": "#/definitions/GroupingPartitionRouter"
3285
3293
  - type: array
3286
3294
  items:
3287
3295
  anyOf:
3288
3296
  - "$ref": "#/definitions/CustomPartitionRouter"
3289
3297
  - "$ref": "#/definitions/ListPartitionRouter"
3290
3298
  - "$ref": "#/definitions/SubstreamPartitionRouter"
3291
- - "$ref": "#/definitions/GroupingPartitionRouter"
3292
3299
  decoder:
3293
3300
  title: Decoder
3294
3301
  description: Component decoding the response so records can be extracted.
@@ -3305,8 +3312,6 @@ definitions:
3305
3312
  type: object
3306
3313
  additionalProperties: true
3307
3314
  GzipDecoder:
3308
- title: gzip
3309
- description: Select 'gzip' for response data that is compressed with gzip. Requires specifying an inner data type/decoder to parse the decompressed data.
3310
3315
  type: object
3311
3316
  required:
3312
3317
  - type
@@ -3322,8 +3327,6 @@ definitions:
3322
3327
  - "$ref": "#/definitions/JsonDecoder"
3323
3328
  - "$ref": "#/definitions/JsonlDecoder"
3324
3329
  CsvDecoder:
3325
- title: CSV
3326
- description: "Select 'CSV' for response data that is formatted as CSV (comma-separated values). Can specify an encoding (default: 'utf-8') and a delimiter (default: ',')."
3327
3330
  type: object
3328
3331
  required:
3329
3332
  - type
@@ -3454,14 +3457,12 @@ definitions:
3454
3457
  - "$ref": "#/definitions/CustomPartitionRouter"
3455
3458
  - "$ref": "#/definitions/ListPartitionRouter"
3456
3459
  - "$ref": "#/definitions/SubstreamPartitionRouter"
3457
- - "$ref": "#/definitions/GroupingPartitionRouter"
3458
3460
  - type: array
3459
3461
  items:
3460
3462
  anyOf:
3461
3463
  - "$ref": "#/definitions/CustomPartitionRouter"
3462
3464
  - "$ref": "#/definitions/ListPartitionRouter"
3463
3465
  - "$ref": "#/definitions/SubstreamPartitionRouter"
3464
- - "$ref": "#/definitions/GroupingPartitionRouter"
3465
3466
  decoder:
3466
3467
  title: Decoder
3467
3468
  description: Component decoding the response so records can be extracted.
@@ -3578,44 +3579,6 @@ definitions:
3578
3579
  $parameters:
3579
3580
  type: object
3580
3581
  additionalProperties: true
3581
- GroupingPartitionRouter:
3582
- title: Grouping Partition Router
3583
- description: >
3584
- A decorator on top of a partition router that groups partitions into batches of a specified size.
3585
- This is useful for APIs that support filtering by multiple partition keys in a single request.
3586
- Note that per-partition incremental syncs may not work as expected because the grouping
3587
- of partitions might change between syncs, potentially leading to inconsistent state tracking.
3588
- type: object
3589
- required:
3590
- - type
3591
- - group_size
3592
- - underlying_partition_router
3593
- properties:
3594
- type:
3595
- type: string
3596
- enum: [GroupingPartitionRouter]
3597
- group_size:
3598
- title: Group Size
3599
- description: The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.
3600
- type: integer
3601
- examples:
3602
- - 10
3603
- - 50
3604
- underlying_partition_router:
3605
- title: Underlying Partition Router
3606
- description: The partition router whose output will be grouped. This can be any valid partition router component.
3607
- anyOf:
3608
- - "$ref": "#/definitions/CustomPartitionRouter"
3609
- - "$ref": "#/definitions/ListPartitionRouter"
3610
- - "$ref": "#/definitions/SubstreamPartitionRouter"
3611
- deduplicate:
3612
- title: Deduplicate Partitions
3613
- description: If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.
3614
- type: boolean
3615
- default: true
3616
- $parameters:
3617
- type: object
3618
- additionalProperties: true
3619
3582
  WaitUntilTimeFromHeader:
3620
3583
  title: Wait Until Time Defined In Response Header
3621
3584
  description: Extract time at which we can retry the request from response header and wait for the difference between now and that time.
@@ -3787,13 +3750,6 @@ definitions:
3787
3750
  type:
3788
3751
  type: string
3789
3752
  enum: [DynamicDeclarativeStream]
3790
- name:
3791
- title: Name
3792
- description: The dynamic stream name.
3793
- type: string
3794
- default: ""
3795
- example:
3796
- - "Tables"
3797
3753
  stream_template:
3798
3754
  title: Stream Template
3799
3755
  description: Reference to the stream template.
@@ -15,6 +15,7 @@ from airbyte_cdk.sources.declarative.extractors.type_transformer import (
15
15
  )
16
16
  from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
17
17
  from airbyte_cdk.sources.declarative.models import SchemaNormalization
18
+ from airbyte_cdk.sources.declarative.retrievers.file_uploader import FileUploader
18
19
  from airbyte_cdk.sources.declarative.transformations import RecordTransformation
19
20
  from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
20
21
  from airbyte_cdk.sources.utils.transform import TypeTransformer
@@ -42,6 +43,7 @@ class RecordSelector(HttpSelector):
42
43
  record_filter: Optional[RecordFilter] = None
43
44
  transformations: List[RecordTransformation] = field(default_factory=lambda: [])
44
45
  transform_before_filtering: bool = False
46
+ file_uploader: Optional[FileUploader] = None
45
47
 
46
48
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
47
49
  self._parameters = parameters
@@ -117,7 +119,10 @@ class RecordSelector(HttpSelector):
117
119
  transformed_filtered_data, schema=records_schema
118
120
  )
119
121
  for data in normalized_data:
120
- yield Record(data=data, stream_name=self.name, associated_slice=stream_slice)
122
+ record = Record(data=data, stream_name=self.name, associated_slice=stream_slice)
123
+ if self.file_uploader:
124
+ self.file_uploader.upload(record)
125
+ yield record
121
126
 
122
127
  def _normalize_by_schema(
123
128
  self, records: Iterable[Mapping[str, Any]], schema: Optional[Mapping[str, Any]]
@@ -79,7 +79,6 @@ class ConcurrentPerPartitionCursor(Cursor):
79
79
  connector_state_manager: ConnectorStateManager,
80
80
  connector_state_converter: AbstractStreamStateConverter,
81
81
  cursor_field: CursorField,
82
- use_global_cursor: bool = False,
83
82
  ) -> None:
84
83
  self._global_cursor: Optional[StreamState] = {}
85
84
  self._stream_name = stream_name
@@ -107,7 +106,7 @@ class ConcurrentPerPartitionCursor(Cursor):
107
106
  self._lookback_window: int = 0
108
107
  self._parent_state: Optional[StreamState] = None
109
108
  self._number_of_partitions: int = 0
110
- self._use_global_cursor: bool = use_global_cursor
109
+ self._use_global_cursor: bool = False
111
110
  self._partition_serializer = PerPartitionKeySerializer()
112
111
  # Track the last time a state message was emitted
113
112
  self._last_emission_time: float = 0.0
@@ -156,7 +156,7 @@ def duration(datestring: str) -> Union[datetime.timedelta, isodate.Duration]:
156
156
 
157
157
 
158
158
  def format_datetime(
159
- dt: Union[str, datetime.datetime, int], format: str, input_format: Optional[str] = None
159
+ dt: Union[str, datetime.datetime], format: str, input_format: Optional[str] = None
160
160
  ) -> str:
161
161
  """
162
162
  Converts datetime to another format
@@ -170,13 +170,9 @@ def format_datetime(
170
170
  """
171
171
  if isinstance(dt, datetime.datetime):
172
172
  return dt.strftime(format)
173
-
174
- if isinstance(dt, int):
175
- dt_datetime = DatetimeParser().parse(dt, input_format if input_format else "%s")
176
- else:
177
- dt_datetime = (
178
- datetime.datetime.strptime(dt, input_format) if input_format else str_to_datetime(dt)
179
- )
173
+ dt_datetime = (
174
+ datetime.datetime.strptime(dt, input_format) if input_format else str_to_datetime(dt)
175
+ )
180
176
  return DatetimeParser().format(dt=dt_datetime, format=format)
181
177
 
182
178
 
@@ -106,7 +106,6 @@ class ManifestDeclarativeSource(DeclarativeSource):
106
106
  AlwaysLogSliceLogger() if emit_connector_builder_messages else DebugSliceLogger()
107
107
  )
108
108
 
109
- self._config = config or {}
110
109
  self._validate_source()
111
110
 
112
111
  @property
@@ -117,12 +116,6 @@ class ManifestDeclarativeSource(DeclarativeSource):
117
116
  def message_repository(self) -> MessageRepository:
118
117
  return self._message_repository
119
118
 
120
- @property
121
- def dynamic_streams(self) -> List[Dict[str, Any]]:
122
- return self._dynamic_stream_configs(
123
- manifest=self._source_config, config=self._config, with_dynamic_stream_name=True
124
- )
125
-
126
119
  @property
127
120
  def connection_checker(self) -> ConnectionChecker:
128
121
  check = self._source_config["check"]
@@ -355,16 +348,13 @@ class ManifestDeclarativeSource(DeclarativeSource):
355
348
  return stream_configs
356
349
 
357
350
  def _dynamic_stream_configs(
358
- self,
359
- manifest: Mapping[str, Any],
360
- config: Mapping[str, Any],
361
- with_dynamic_stream_name: Optional[bool] = None,
351
+ self, manifest: Mapping[str, Any], config: Mapping[str, Any]
362
352
  ) -> List[Dict[str, Any]]:
363
353
  dynamic_stream_definitions: List[Dict[str, Any]] = manifest.get("dynamic_streams", [])
364
354
  dynamic_stream_configs: List[Dict[str, Any]] = []
365
355
  seen_dynamic_streams: Set[str] = set()
366
356
 
367
- for dynamic_definition_index, dynamic_definition in enumerate(dynamic_stream_definitions):
357
+ for dynamic_definition in dynamic_stream_definitions:
368
358
  components_resolver_config = dynamic_definition["components_resolver"]
369
359
 
370
360
  if not components_resolver_config:
@@ -397,23 +387,12 @@ class ManifestDeclarativeSource(DeclarativeSource):
397
387
  for dynamic_stream in components_resolver.resolve_components(
398
388
  stream_template_config=stream_template_config
399
389
  ):
400
- dynamic_stream = {
401
- **ManifestComponentTransformer().propagate_types_and_parameters(
402
- "", dynamic_stream, {}, use_parent_parameters=True
403
- )
404
- }
405
-
406
390
  if "type" not in dynamic_stream:
407
391
  dynamic_stream["type"] = "DeclarativeStream"
408
392
 
409
393
  # Ensure that each stream is created with a unique name
410
394
  name = dynamic_stream.get("name")
411
395
 
412
- if with_dynamic_stream_name:
413
- dynamic_stream["dynamic_stream_name"] = dynamic_definition.get(
414
- "name", f"dynamic_stream_{dynamic_definition_index}"
415
- )
416
-
417
396
  if not isinstance(name, str):
418
397
  raise ValueError(
419
398
  f"Expected stream name {name} to be a string, got {type(name)}."
@@ -42,15 +42,13 @@ class BearerAuthenticator(BaseModel):
42
42
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
43
43
 
44
44
 
45
- class DynamicStreamCheckConfig(BaseModel):
46
- type: Literal["DynamicStreamCheckConfig"]
47
- dynamic_stream_name: str = Field(
48
- ..., description="The dynamic stream name.", title="Dynamic Stream Name"
49
- )
50
- stream_count: Optional[int] = Field(
51
- 0,
52
- description="Numbers of the streams to try reading from when running a check operation.",
53
- title="Stream Count",
45
+ class CheckStream(BaseModel):
46
+ type: Literal["CheckStream"]
47
+ stream_names: List[str] = Field(
48
+ ...,
49
+ description="Names of the streams to try reading from when running a check operation.",
50
+ examples=[["users"], ["users", "contacts"]],
51
+ title="Stream Names",
54
52
  )
55
53
 
56
54
 
@@ -1525,17 +1523,6 @@ class AuthFlow(BaseModel):
1525
1523
  oauth_config_specification: Optional[OAuthConfigSpecification] = None
1526
1524
 
1527
1525
 
1528
- class CheckStream(BaseModel):
1529
- type: Literal["CheckStream"]
1530
- stream_names: Optional[List[str]] = Field(
1531
- None,
1532
- description="Names of the streams to try reading from when running a check operation.",
1533
- examples=[["users"], ["users", "contacts"]],
1534
- title="Stream Names",
1535
- )
1536
- dynamic_streams_check_configs: Optional[List[DynamicStreamCheckConfig]] = None
1537
-
1538
-
1539
1526
  class IncrementingCountCursor(BaseModel):
1540
1527
  type: Literal["IncrementingCountCursor"]
1541
1528
  cursor_field: str = Field(
@@ -1903,10 +1890,9 @@ class DeclarativeSource1(BaseModel):
1903
1890
  spec: Optional[Spec] = None
1904
1891
  concurrency_level: Optional[ConcurrencyLevel] = None
1905
1892
  api_budget: Optional[HTTPAPIBudget] = None
1906
- max_concurrent_async_job_count: Optional[Union[int, str]] = Field(
1893
+ max_concurrent_async_job_count: Optional[int] = Field(
1907
1894
  None,
1908
1895
  description="Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.",
1909
- examples=[3, "{{ config['max_concurrent_async_job_count'] }}"],
1910
1896
  title="Maximum Concurrent Asynchronous Jobs",
1911
1897
  )
1912
1898
  metadata: Optional[Dict[str, Any]] = Field(
@@ -1936,10 +1922,9 @@ class DeclarativeSource2(BaseModel):
1936
1922
  spec: Optional[Spec] = None
1937
1923
  concurrency_level: Optional[ConcurrencyLevel] = None
1938
1924
  api_budget: Optional[HTTPAPIBudget] = None
1939
- max_concurrent_async_job_count: Optional[Union[int, str]] = Field(
1925
+ max_concurrent_async_job_count: Optional[int] = Field(
1940
1926
  None,
1941
1927
  description="Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.",
1942
- examples=[3, "{{ config['max_concurrent_async_job_count'] }}"],
1943
1928
  title="Maximum Concurrent Asynchronous Jobs",
1944
1929
  )
1945
1930
  metadata: Optional[Dict[str, Any]] = Field(
@@ -2004,6 +1989,31 @@ class SelectiveAuthenticator(BaseModel):
2004
1989
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2005
1990
 
2006
1991
 
1992
+ class FileUploader(BaseModel):
1993
+ type: Literal["FileUploader"]
1994
+ requester: Union[CustomRequester, HttpRequester] = Field(
1995
+ ...,
1996
+ description="Requester component that describes how to prepare HTTP requests to send to the source API.",
1997
+ )
1998
+ download_target_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field(
1999
+ ...,
2000
+ description="Responsible for fetching the url where the file is located. This is applied on each records and not on the HTTP response",
2001
+ )
2002
+ file_extractor: Optional[Union[CustomRecordExtractor, DpathExtractor]] = Field(
2003
+ None,
2004
+ description="Responsible for fetching the content of the file. If not defined, the assumption is that the whole response body is the file content",
2005
+ )
2006
+ filename_extractor: Optional[str] = Field(
2007
+ None,
2008
+ description="Defines the name to store the file. Stream name is automatically added to the file path. File unique ID can be used to avoid overwriting files. Random UUID will be used if the extractor is not provided.",
2009
+ examples=[
2010
+ "{{ record.id }}/{{ record.file_name }}/",
2011
+ "{{ record.id }}_{{ record.file_name }}/",
2012
+ ],
2013
+ )
2014
+ parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2015
+
2016
+
2007
2017
  class DeclarativeStream(BaseModel):
2008
2018
  class Config:
2009
2019
  extra = Extra.allow
@@ -2062,6 +2072,11 @@ class DeclarativeStream(BaseModel):
2062
2072
  description="Array of state migrations to be applied on the input state",
2063
2073
  title="State Migrations",
2064
2074
  )
2075
+ file_uploader: Optional[FileUploader] = Field(
2076
+ None,
2077
+ description="(experimental) Describes how to fetch a file",
2078
+ title="File Uploader",
2079
+ )
2065
2080
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2066
2081
 
2067
2082
 
@@ -2316,15 +2331,7 @@ class SimpleRetriever(BaseModel):
2316
2331
  CustomPartitionRouter,
2317
2332
  ListPartitionRouter,
2318
2333
  SubstreamPartitionRouter,
2319
- GroupingPartitionRouter,
2320
- List[
2321
- Union[
2322
- CustomPartitionRouter,
2323
- ListPartitionRouter,
2324
- SubstreamPartitionRouter,
2325
- GroupingPartitionRouter,
2326
- ]
2327
- ],
2334
+ List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
2328
2335
  ]
2329
2336
  ] = Field(
2330
2337
  [],
@@ -2406,15 +2413,7 @@ class AsyncRetriever(BaseModel):
2406
2413
  CustomPartitionRouter,
2407
2414
  ListPartitionRouter,
2408
2415
  SubstreamPartitionRouter,
2409
- GroupingPartitionRouter,
2410
- List[
2411
- Union[
2412
- CustomPartitionRouter,
2413
- ListPartitionRouter,
2414
- SubstreamPartitionRouter,
2415
- GroupingPartitionRouter,
2416
- ]
2417
- ],
2416
+ List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
2418
2417
  ]
2419
2418
  ] = Field(
2420
2419
  [],
@@ -2466,29 +2465,6 @@ class SubstreamPartitionRouter(BaseModel):
2466
2465
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2467
2466
 
2468
2467
 
2469
- class GroupingPartitionRouter(BaseModel):
2470
- type: Literal["GroupingPartitionRouter"]
2471
- group_size: int = Field(
2472
- ...,
2473
- description="The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.",
2474
- examples=[10, 50],
2475
- title="Group Size",
2476
- )
2477
- underlying_partition_router: Union[
2478
- CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter
2479
- ] = Field(
2480
- ...,
2481
- description="The partition router whose output will be grouped. This can be any valid partition router component.",
2482
- title="Underlying Partition Router",
2483
- )
2484
- deduplicate: Optional[bool] = Field(
2485
- True,
2486
- description="If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.",
2487
- title="Deduplicate Partitions",
2488
- )
2489
- parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2490
-
2491
-
2492
2468
  class HttpComponentsResolver(BaseModel):
2493
2469
  type: Literal["HttpComponentsResolver"]
2494
2470
  retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field(
@@ -2502,9 +2478,6 @@ class HttpComponentsResolver(BaseModel):
2502
2478
 
2503
2479
  class DynamicDeclarativeStream(BaseModel):
2504
2480
  type: Literal["DynamicDeclarativeStream"]
2505
- name: Optional[str] = Field(
2506
- "", description="The dynamic stream name.", example=["Tables"], title="Name"
2507
- )
2508
2481
  stream_template: DeclarativeStream = Field(
2509
2482
  ..., description="Reference to the stream template.", title="Stream Template"
2510
2483
  )
@@ -2521,6 +2494,7 @@ CompositeErrorHandler.update_forward_refs()
2521
2494
  DeclarativeSource1.update_forward_refs()
2522
2495
  DeclarativeSource2.update_forward_refs()
2523
2496
  SelectiveAuthenticator.update_forward_refs()
2497
+ FileUploader.update_forward_refs()
2524
2498
  DeclarativeStream.update_forward_refs()
2525
2499
  SessionTokenAuthenticator.update_forward_refs()
2526
2500
  DynamicSchemaLoader.update_forward_refs()
@@ -4,7 +4,7 @@
4
4
 
5
5
  import copy
6
6
  import typing
7
- from typing import Any, Mapping, Optional
7
+ from typing import Any, Mapping
8
8
 
9
9
  PARAMETERS_STR = "$parameters"
10
10
 
@@ -94,7 +94,6 @@ class ManifestComponentTransformer:
94
94
  parent_field_identifier: str,
95
95
  declarative_component: Mapping[str, Any],
96
96
  parent_parameters: Mapping[str, Any],
97
- use_parent_parameters: Optional[bool] = None,
98
97
  ) -> Mapping[str, Any]:
99
98
  """
100
99
  Recursively transforms the specified declarative component and subcomponents to propagate parameters and insert the
@@ -104,7 +103,6 @@ class ManifestComponentTransformer:
104
103
  :param declarative_component: The current component that is having type and parameters added
105
104
  :param parent_field_identifier: The name of the field of the current component coming from the parent component
106
105
  :param parent_parameters: The parameters set on parent components defined before the current component
107
- :param use_parent_parameters: If set, parent parameters will be used as the source of truth when key names are the same
108
106
  :return: A deep copy of the transformed component with types and parameters persisted to it
109
107
  """
110
108
  propagated_component = dict(copy.deepcopy(declarative_component))
@@ -132,11 +130,7 @@ class ManifestComponentTransformer:
132
130
  # level take precedence
133
131
  current_parameters = dict(copy.deepcopy(parent_parameters))
134
132
  component_parameters = propagated_component.pop(PARAMETERS_STR, {})
135
- current_parameters = (
136
- {**component_parameters, **current_parameters}
137
- if use_parent_parameters
138
- else {**current_parameters, **component_parameters}
139
- )
133
+ current_parameters = {**current_parameters, **component_parameters}
140
134
 
141
135
  # Parameters should be applied to the current component fields with the existing field taking precedence over parameters if
142
136
  # both exist
@@ -151,10 +145,7 @@ class ManifestComponentTransformer:
151
145
  excluded_parameter = current_parameters.pop(field_name, None)
152
146
  parent_type_field_identifier = f"{propagated_component.get('type')}.{field_name}"
153
147
  propagated_component[field_name] = self.propagate_types_and_parameters(
154
- parent_type_field_identifier,
155
- field_value,
156
- current_parameters,
157
- use_parent_parameters=use_parent_parameters,
148
+ parent_type_field_identifier, field_value, current_parameters
158
149
  )
159
150
  if excluded_parameter:
160
151
  current_parameters[field_name] = excluded_parameter
@@ -167,10 +158,7 @@ class ManifestComponentTransformer:
167
158
  f"{propagated_component.get('type')}.{field_name}"
168
159
  )
169
160
  field_value[i] = self.propagate_types_and_parameters(
170
- parent_type_field_identifier,
171
- element,
172
- current_parameters,
173
- use_parent_parameters=use_parent_parameters,
161
+ parent_type_field_identifier, element, current_parameters
174
162
  )
175
163
  if excluded_parameter:
176
164
  current_parameters[field_name] = excluded_parameter