airbyte-cdk 6.45.0__py3-none-any.whl → 6.45.0.dev4101__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. airbyte_cdk/connector_builder/connector_builder_handler.py +6 -45
  2. airbyte_cdk/connector_builder/main.py +2 -5
  3. airbyte_cdk/models/__init__.py +1 -0
  4. airbyte_cdk/models/airbyte_protocol.py +1 -3
  5. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +1 -1
  6. airbyte_cdk/sources/declarative/async_job/job.py +0 -6
  7. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +18 -18
  8. airbyte_cdk/sources/declarative/async_job/job_tracker.py +6 -22
  9. airbyte_cdk/sources/declarative/checks/__init__.py +2 -5
  10. airbyte_cdk/sources/declarative/checks/check_stream.py +11 -113
  11. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +8 -0
  12. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +50 -210
  13. airbyte_cdk/sources/declarative/extractors/record_selector.py +6 -1
  14. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +1 -2
  15. airbyte_cdk/sources/declarative/interpolation/macros.py +4 -8
  16. airbyte_cdk/sources/declarative/manifest_declarative_source.py +2 -23
  17. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +43 -142
  18. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +4 -16
  19. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +50 -263
  20. airbyte_cdk/sources/declarative/partition_routers/__init__.py +0 -4
  21. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +1 -5
  22. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +2 -25
  23. airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
  24. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +30 -101
  25. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +9 -4
  26. airbyte_cdk/sources/declarative/transformations/add_fields.py +1 -3
  27. airbyte_cdk/sources/file_based/file_based_stream_reader.py +32 -14
  28. airbyte_cdk/sources/file_based/file_record_data.py +24 -0
  29. airbyte_cdk/sources/file_based/file_types/file_transfer.py +8 -15
  30. airbyte_cdk/sources/file_based/schema_helpers.py +11 -1
  31. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +0 -1
  32. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +16 -31
  33. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +1 -3
  34. airbyte_cdk/sources/streams/concurrent/default_stream.py +3 -0
  35. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +0 -4
  36. airbyte_cdk/sources/types.py +11 -2
  37. airbyte_cdk/sources/utils/files_directory.py +15 -0
  38. airbyte_cdk/sources/utils/record_helper.py +8 -8
  39. {airbyte_cdk-6.45.0.dist-info → airbyte_cdk-6.45.0.dev4101.dist-info}/METADATA +2 -2
  40. {airbyte_cdk-6.45.0.dist-info → airbyte_cdk-6.45.0.dev4101.dist-info}/RECORD +44 -50
  41. airbyte_cdk/models/file_transfer_record_message.py +0 -13
  42. airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +0 -150
  43. airbyte_cdk/sources/declarative/requesters/query_properties/__init__.py +0 -13
  44. airbyte_cdk/sources/declarative/requesters/query_properties/properties_from_endpoint.py +0 -40
  45. airbyte_cdk/sources/declarative/requesters/query_properties/property_chunking.py +0 -69
  46. airbyte_cdk/sources/declarative/requesters/query_properties/query_properties.py +0 -58
  47. airbyte_cdk/sources/declarative/requesters/query_properties/strategies/__init__.py +0 -10
  48. airbyte_cdk/sources/declarative/requesters/query_properties/strategies/group_by_key.py +0 -33
  49. airbyte_cdk/sources/declarative/requesters/query_properties/strategies/merge_strategy.py +0 -19
  50. {airbyte_cdk-6.45.0.dist-info → airbyte_cdk-6.45.0.dev4101.dist-info}/LICENSE.txt +0 -0
  51. {airbyte_cdk-6.45.0.dist-info → airbyte_cdk-6.45.0.dev4101.dist-info}/LICENSE_SHORT +0 -0
  52. {airbyte_cdk-6.45.0.dist-info → airbyte_cdk-6.45.0.dev4101.dist-info}/WHEEL +0 -0
  53. {airbyte_cdk-6.45.0.dist-info → airbyte_cdk-6.45.0.dev4101.dist-info}/entry_points.txt +0 -0
@@ -1,5 +1,3 @@
1
- # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
-
3
1
  # generated by datamodel-codegen:
4
2
  # filename: declarative_component_schema.yaml
5
3
 
@@ -44,15 +42,13 @@ class BearerAuthenticator(BaseModel):
44
42
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
45
43
 
46
44
 
47
- class DynamicStreamCheckConfig(BaseModel):
48
- type: Literal["DynamicStreamCheckConfig"]
49
- dynamic_stream_name: str = Field(
50
- ..., description="The dynamic stream name.", title="Dynamic Stream Name"
51
- )
52
- stream_count: Optional[int] = Field(
53
- 0,
54
- description="The number of streams to attempt reading from during a check operation. If `stream_count` exceeds the total number of available streams, the minimum of the two values will be used.",
55
- title="Stream Count",
45
+ class CheckStream(BaseModel):
46
+ type: Literal["CheckStream"]
47
+ stream_names: List[str] = Field(
48
+ ...,
49
+ description="Names of the streams to try reading from when running a check operation.",
50
+ examples=[["users"], ["users", "contacts"]],
51
+ title="Stream Names",
56
52
  )
57
53
 
58
54
 
@@ -720,17 +716,6 @@ class ExponentialBackoffStrategy(BaseModel):
720
716
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
721
717
 
722
718
 
723
- class GroupByKeyMergeStrategy(BaseModel):
724
- type: Literal["GroupByKeyMergeStrategy"]
725
- key: Union[str, List[str]] = Field(
726
- ...,
727
- description="The name of the field on the record whose value will be used to group properties that were retrieved through multiple API requests.",
728
- examples=["id", ["parent_id", "end_date"]],
729
- title="Key",
730
- )
731
- parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
732
-
733
-
734
719
  class SessionTokenRequestBearerAuthenticator(BaseModel):
735
720
  type: Literal["Bearer"]
736
721
 
@@ -1202,31 +1187,6 @@ class PrimaryKey(BaseModel):
1202
1187
  )
1203
1188
 
1204
1189
 
1205
- class PropertyLimitType(Enum):
1206
- characters = "characters"
1207
- property_count = "property_count"
1208
-
1209
-
1210
- class PropertyChunking(BaseModel):
1211
- type: Literal["PropertyChunking"]
1212
- property_limit_type: PropertyLimitType = Field(
1213
- ...,
1214
- description="The type used to determine the maximum number of properties per chunk",
1215
- title="Property Limit Type",
1216
- )
1217
- property_limit: Optional[int] = Field(
1218
- None,
1219
- description="The maximum amount of properties that can be retrieved per request according to the limit type.",
1220
- title="Property Limit",
1221
- )
1222
- record_merge_strategy: Optional[GroupByKeyMergeStrategy] = Field(
1223
- None,
1224
- description="Dictates how to records that require multiple requests to get all properties should be emitted to the destination",
1225
- title="Record Merge Strategy",
1226
- )
1227
- parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1228
-
1229
-
1230
1190
  class RecordFilter(BaseModel):
1231
1191
  type: Literal["RecordFilter"]
1232
1192
  condition: Optional[str] = Field(
@@ -1563,17 +1523,6 @@ class AuthFlow(BaseModel):
1563
1523
  oauth_config_specification: Optional[OAuthConfigSpecification] = None
1564
1524
 
1565
1525
 
1566
- class CheckStream(BaseModel):
1567
- type: Literal["CheckStream"]
1568
- stream_names: Optional[List[str]] = Field(
1569
- None,
1570
- description="Names of the streams to try reading from when running a check operation.",
1571
- examples=[["users"], ["users", "contacts"]],
1572
- title="Stream Names",
1573
- )
1574
- dynamic_streams_check_configs: Optional[List[DynamicStreamCheckConfig]] = None
1575
-
1576
-
1577
1526
  class IncrementingCountCursor(BaseModel):
1578
1527
  type: Literal["IncrementingCountCursor"]
1579
1528
  cursor_field: str = Field(
@@ -1941,10 +1890,9 @@ class DeclarativeSource1(BaseModel):
1941
1890
  spec: Optional[Spec] = None
1942
1891
  concurrency_level: Optional[ConcurrencyLevel] = None
1943
1892
  api_budget: Optional[HTTPAPIBudget] = None
1944
- max_concurrent_async_job_count: Optional[Union[int, str]] = Field(
1893
+ max_concurrent_async_job_count: Optional[int] = Field(
1945
1894
  None,
1946
1895
  description="Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.",
1947
- examples=[3, "{{ config['max_concurrent_async_job_count'] }}"],
1948
1896
  title="Maximum Concurrent Asynchronous Jobs",
1949
1897
  )
1950
1898
  metadata: Optional[Dict[str, Any]] = Field(
@@ -1974,10 +1922,9 @@ class DeclarativeSource2(BaseModel):
1974
1922
  spec: Optional[Spec] = None
1975
1923
  concurrency_level: Optional[ConcurrencyLevel] = None
1976
1924
  api_budget: Optional[HTTPAPIBudget] = None
1977
- max_concurrent_async_job_count: Optional[Union[int, str]] = Field(
1925
+ max_concurrent_async_job_count: Optional[int] = Field(
1978
1926
  None,
1979
1927
  description="Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.",
1980
- examples=[3, "{{ config['max_concurrent_async_job_count'] }}"],
1981
1928
  title="Maximum Concurrent Asynchronous Jobs",
1982
1929
  )
1983
1930
  metadata: Optional[Dict[str, Any]] = Field(
@@ -2042,6 +1989,31 @@ class SelectiveAuthenticator(BaseModel):
2042
1989
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2043
1990
 
2044
1991
 
1992
+ class FileUploader(BaseModel):
1993
+ type: Literal["FileUploader"]
1994
+ requester: Union[CustomRequester, HttpRequester] = Field(
1995
+ ...,
1996
+ description="Requester component that describes how to prepare HTTP requests to send to the source API.",
1997
+ )
1998
+ download_target_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field(
1999
+ ...,
2000
+ description="Responsible for fetching the url where the file is located. This is applied on each records and not on the HTTP response",
2001
+ )
2002
+ file_extractor: Optional[Union[CustomRecordExtractor, DpathExtractor]] = Field(
2003
+ None,
2004
+ description="Responsible for fetching the content of the file. If not defined, the assumption is that the whole response body is the file content",
2005
+ )
2006
+ filename_extractor: Optional[str] = Field(
2007
+ None,
2008
+ description="Defines the name to store the file. Stream name is automatically added to the file path. File unique ID can be used to avoid overwriting files. Random UUID will be used if the extractor is not provided.",
2009
+ examples=[
2010
+ "{{ record.id }}/{{ record.file_name }}/",
2011
+ "{{ record.id }}_{{ record.file_name }}/",
2012
+ ],
2013
+ )
2014
+ parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2015
+
2016
+
2045
2017
  class DeclarativeStream(BaseModel):
2046
2018
  class Config:
2047
2019
  extra = Extra.allow
@@ -2100,6 +2072,11 @@ class DeclarativeStream(BaseModel):
2100
2072
  description="Array of state migrations to be applied on the input state",
2101
2073
  title="State Migrations",
2102
2074
  )
2075
+ file_uploader: Optional[FileUploader] = Field(
2076
+ None,
2077
+ description="(experimental) Describes how to fetch a file",
2078
+ title="File Uploader",
2079
+ )
2103
2080
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2104
2081
 
2105
2082
 
@@ -2225,7 +2202,7 @@ class HttpRequester(BaseModel):
2225
2202
  examples=[{"Output-Format": "JSON"}, {"Version": "{{ config['version'] }}"}],
2226
2203
  title="Request Headers",
2227
2204
  )
2228
- request_parameters: Optional[Union[str, Dict[str, Union[str, Any]]]] = Field(
2205
+ request_parameters: Optional[Union[str, Dict[str, str]]] = Field(
2229
2206
  None,
2230
2207
  description="Specifies the query parameters that should be set on an outgoing HTTP request given the inputs.",
2231
2208
  examples=[
@@ -2315,40 +2292,6 @@ class ParentStreamConfig(BaseModel):
2315
2292
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2316
2293
 
2317
2294
 
2318
- class PropertiesFromEndpoint(BaseModel):
2319
- type: Literal["PropertiesFromEndpoint"]
2320
- property_field_path: List[str] = Field(
2321
- ...,
2322
- description="Describes the path to the field that should be extracted",
2323
- examples=[["name"]],
2324
- )
2325
- retriever: Union[CustomRetriever, SimpleRetriever] = Field(
2326
- ...,
2327
- description="Requester component that describes how to fetch the properties to query from a remote API endpoint.",
2328
- )
2329
- parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2330
-
2331
-
2332
- class QueryProperties(BaseModel):
2333
- type: Literal["QueryProperties"]
2334
- property_list: Union[List[str], PropertiesFromEndpoint] = Field(
2335
- ...,
2336
- description="The set of properties that will be queried for in the outbound request. This can either be statically defined or dynamic based on an API endpoint",
2337
- title="Property List",
2338
- )
2339
- always_include_properties: Optional[List[str]] = Field(
2340
- None,
2341
- description="The list of properties that should be included in every set of properties when multiple chunks of properties are being requested.",
2342
- title="Always Include Properties",
2343
- )
2344
- property_chunking: Optional[PropertyChunking] = Field(
2345
- None,
2346
- description="Defines how query properties will be grouped into smaller sets for APIs with limitations on the number of properties fetched per API request.",
2347
- title="Property Chunking",
2348
- )
2349
- parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2350
-
2351
-
2352
2295
  class StateDelegatingStream(BaseModel):
2353
2296
  type: Literal["StateDelegatingStream"]
2354
2297
  name: str = Field(..., description="The stream name.", example=["Users"], title="Name")
@@ -2388,15 +2331,7 @@ class SimpleRetriever(BaseModel):
2388
2331
  CustomPartitionRouter,
2389
2332
  ListPartitionRouter,
2390
2333
  SubstreamPartitionRouter,
2391
- GroupingPartitionRouter,
2392
- List[
2393
- Union[
2394
- CustomPartitionRouter,
2395
- ListPartitionRouter,
2396
- SubstreamPartitionRouter,
2397
- GroupingPartitionRouter,
2398
- ]
2399
- ],
2334
+ List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
2400
2335
  ]
2401
2336
  ] = Field(
2402
2337
  [],
@@ -2478,15 +2413,7 @@ class AsyncRetriever(BaseModel):
2478
2413
  CustomPartitionRouter,
2479
2414
  ListPartitionRouter,
2480
2415
  SubstreamPartitionRouter,
2481
- GroupingPartitionRouter,
2482
- List[
2483
- Union[
2484
- CustomPartitionRouter,
2485
- ListPartitionRouter,
2486
- SubstreamPartitionRouter,
2487
- GroupingPartitionRouter,
2488
- ]
2489
- ],
2416
+ List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
2490
2417
  ]
2491
2418
  ] = Field(
2492
2419
  [],
@@ -2538,29 +2465,6 @@ class SubstreamPartitionRouter(BaseModel):
2538
2465
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2539
2466
 
2540
2467
 
2541
- class GroupingPartitionRouter(BaseModel):
2542
- type: Literal["GroupingPartitionRouter"]
2543
- group_size: int = Field(
2544
- ...,
2545
- description="The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.",
2546
- examples=[10, 50],
2547
- title="Group Size",
2548
- )
2549
- underlying_partition_router: Union[
2550
- CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter
2551
- ] = Field(
2552
- ...,
2553
- description="The partition router whose output will be grouped. This can be any valid partition router component.",
2554
- title="Underlying Partition Router",
2555
- )
2556
- deduplicate: Optional[bool] = Field(
2557
- True,
2558
- description="If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.",
2559
- title="Deduplicate Partitions",
2560
- )
2561
- parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2562
-
2563
-
2564
2468
  class HttpComponentsResolver(BaseModel):
2565
2469
  type: Literal["HttpComponentsResolver"]
2566
2470
  retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field(
@@ -2574,9 +2478,6 @@ class HttpComponentsResolver(BaseModel):
2574
2478
 
2575
2479
  class DynamicDeclarativeStream(BaseModel):
2576
2480
  type: Literal["DynamicDeclarativeStream"]
2577
- name: Optional[str] = Field(
2578
- "", description="The dynamic stream name.", example=["Tables"], title="Name"
2579
- )
2580
2481
  stream_template: DeclarativeStream = Field(
2581
2482
  ..., description="Reference to the stream template.", title="Stream Template"
2582
2483
  )
@@ -2593,10 +2494,10 @@ CompositeErrorHandler.update_forward_refs()
2593
2494
  DeclarativeSource1.update_forward_refs()
2594
2495
  DeclarativeSource2.update_forward_refs()
2595
2496
  SelectiveAuthenticator.update_forward_refs()
2497
+ FileUploader.update_forward_refs()
2596
2498
  DeclarativeStream.update_forward_refs()
2597
2499
  SessionTokenAuthenticator.update_forward_refs()
2598
2500
  DynamicSchemaLoader.update_forward_refs()
2599
2501
  ParentStreamConfig.update_forward_refs()
2600
- PropertiesFromEndpoint.update_forward_refs()
2601
2502
  SimpleRetriever.update_forward_refs()
2602
2503
  AsyncRetriever.update_forward_refs()
@@ -4,7 +4,7 @@
4
4
 
5
5
  import copy
6
6
  import typing
7
- from typing import Any, Mapping, Optional
7
+ from typing import Any, Mapping
8
8
 
9
9
  PARAMETERS_STR = "$parameters"
10
10
 
@@ -94,7 +94,6 @@ class ManifestComponentTransformer:
94
94
  parent_field_identifier: str,
95
95
  declarative_component: Mapping[str, Any],
96
96
  parent_parameters: Mapping[str, Any],
97
- use_parent_parameters: Optional[bool] = None,
98
97
  ) -> Mapping[str, Any]:
99
98
  """
100
99
  Recursively transforms the specified declarative component and subcomponents to propagate parameters and insert the
@@ -104,7 +103,6 @@ class ManifestComponentTransformer:
104
103
  :param declarative_component: The current component that is having type and parameters added
105
104
  :param parent_field_identifier: The name of the field of the current component coming from the parent component
106
105
  :param parent_parameters: The parameters set on parent components defined before the current component
107
- :param use_parent_parameters: If set, parent parameters will be used as the source of truth when key names are the same
108
106
  :return: A deep copy of the transformed component with types and parameters persisted to it
109
107
  """
110
108
  propagated_component = dict(copy.deepcopy(declarative_component))
@@ -132,11 +130,7 @@ class ManifestComponentTransformer:
132
130
  # level take precedence
133
131
  current_parameters = dict(copy.deepcopy(parent_parameters))
134
132
  component_parameters = propagated_component.pop(PARAMETERS_STR, {})
135
- current_parameters = (
136
- {**component_parameters, **current_parameters}
137
- if use_parent_parameters
138
- else {**current_parameters, **component_parameters}
139
- )
133
+ current_parameters = {**current_parameters, **component_parameters}
140
134
 
141
135
  # Parameters should be applied to the current component fields with the existing field taking precedence over parameters if
142
136
  # both exist
@@ -151,10 +145,7 @@ class ManifestComponentTransformer:
151
145
  excluded_parameter = current_parameters.pop(field_name, None)
152
146
  parent_type_field_identifier = f"{propagated_component.get('type')}.{field_name}"
153
147
  propagated_component[field_name] = self.propagate_types_and_parameters(
154
- parent_type_field_identifier,
155
- field_value,
156
- current_parameters,
157
- use_parent_parameters=use_parent_parameters,
148
+ parent_type_field_identifier, field_value, current_parameters
158
149
  )
159
150
  if excluded_parameter:
160
151
  current_parameters[field_name] = excluded_parameter
@@ -167,10 +158,7 @@ class ManifestComponentTransformer:
167
158
  f"{propagated_component.get('type')}.{field_name}"
168
159
  )
169
160
  field_value[i] = self.propagate_types_and_parameters(
170
- parent_type_field_identifier,
171
- element,
172
- current_parameters,
173
- use_parent_parameters=use_parent_parameters,
161
+ parent_type_field_identifier, element, current_parameters
174
162
  )
175
163
  if excluded_parameter:
176
164
  current_parameters[field_name] = excluded_parameter