airbyte-cdk 6.42.0__py3-none-any.whl → 6.43.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +128 -1
- airbyte_cdk/sources/declarative/interpolation/macros.py +8 -4
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +80 -1
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +189 -5
- airbyte_cdk/sources/declarative/requesters/query_properties/__init__.py +14 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/group_by_key.py +24 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/properties_from_endpoint.py +40 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/property_chunking.py +65 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/query_properties.py +48 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +25 -2
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +100 -31
- {airbyte_cdk-6.42.0.dist-info → airbyte_cdk-6.43.0.dev0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.42.0.dist-info → airbyte_cdk-6.43.0.dev0.dist-info}/RECORD +17 -12
- {airbyte_cdk-6.42.0.dist-info → airbyte_cdk-6.43.0.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.42.0.dist-info → airbyte_cdk-6.43.0.dev0.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.42.0.dist-info → airbyte_cdk-6.43.0.dev0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.42.0.dist-info → airbyte_cdk-6.43.0.dev0.dist-info}/entry_points.txt +0 -0
@@ -1023,6 +1023,15 @@ definitions:
|
|
1023
1023
|
$parameters:
|
1024
1024
|
type: object
|
1025
1025
|
additionalProperties: true
|
1026
|
+
EmitPartialRecordMergeStrategy:
|
1027
|
+
title: Emit Partial Record
|
1028
|
+
description: Record merge strategy where in the case where multiple requests are needed to retrieve all properties, properties are not consolidated back into a single record and are instead emitted as separate groups of properties. This strategy should only be used when records do not have a unique identifier like a primary key.
|
1029
|
+
required:
|
1030
|
+
- type
|
1031
|
+
properties:
|
1032
|
+
type:
|
1033
|
+
type: string
|
1034
|
+
enum: [EmitPartialRecordMergeStrategy]
|
1026
1035
|
JwtAuthenticator:
|
1027
1036
|
title: JWT Authenticator
|
1028
1037
|
description: Authenticator for requests using JWT authentication flow.
|
@@ -1731,6 +1740,30 @@ definitions:
|
|
1731
1740
|
$parameters:
|
1732
1741
|
type: object
|
1733
1742
|
additionalProperties: true
|
1743
|
+
GroupByKeyMergeStrategy:
|
1744
|
+
title: Group by Key
|
1745
|
+
description: Record merge strategy that combines records according to fields on the record.
|
1746
|
+
required:
|
1747
|
+
- type
|
1748
|
+
- key
|
1749
|
+
properties:
|
1750
|
+
type:
|
1751
|
+
type: string
|
1752
|
+
enum: [GroupByKeyMergeStrategy]
|
1753
|
+
key:
|
1754
|
+
title: Key
|
1755
|
+
description: The name of the field on the record whose value will be used to group properties that were retrieved through multiple API requests.
|
1756
|
+
anyOf:
|
1757
|
+
- type: string
|
1758
|
+
- type: array
|
1759
|
+
items:
|
1760
|
+
type: string
|
1761
|
+
examples:
|
1762
|
+
- "id"
|
1763
|
+
- ["parent_id", "end_date"]
|
1764
|
+
$parameters:
|
1765
|
+
type: object
|
1766
|
+
additionalProperties: true
|
1734
1767
|
SessionTokenAuthenticator:
|
1735
1768
|
type: object
|
1736
1769
|
required:
|
@@ -1950,7 +1983,9 @@ definitions:
|
|
1950
1983
|
- type: string
|
1951
1984
|
- type: object
|
1952
1985
|
additionalProperties:
|
1953
|
-
|
1986
|
+
anyOf:
|
1987
|
+
- type: string
|
1988
|
+
- $ref": "#/definitions/QueryProperties"
|
1954
1989
|
interpolation_context:
|
1955
1990
|
- next_page_token
|
1956
1991
|
- stream_interval
|
@@ -2968,6 +3003,98 @@ definitions:
|
|
2968
3003
|
examples:
|
2969
3004
|
- id
|
2970
3005
|
- ["code", "type"]
|
3006
|
+
PropertiesFromEndpoint:
|
3007
|
+
title: Properties from Endpoint
|
3008
|
+
description: Defines the behavior for fetching the list of properties from an API that will be loaded into the requests to extract records.
|
3009
|
+
type: object
|
3010
|
+
required:
|
3011
|
+
- type
|
3012
|
+
- property_field_path
|
3013
|
+
- retriever
|
3014
|
+
properties:
|
3015
|
+
type:
|
3016
|
+
type: string
|
3017
|
+
enum: [PropertiesFromEndpoint]
|
3018
|
+
property_field_path:
|
3019
|
+
description: Describes the path to the field that should be extracted
|
3020
|
+
type: array
|
3021
|
+
items:
|
3022
|
+
type: string
|
3023
|
+
examples:
|
3024
|
+
- ["name"]
|
3025
|
+
interpolation_context:
|
3026
|
+
- config
|
3027
|
+
- parameters
|
3028
|
+
retriever:
|
3029
|
+
description: Requester component that describes how to fetch the properties to query from a remote API endpoint.
|
3030
|
+
anyOf:
|
3031
|
+
- "$ref": "#/definitions/CustomRetriever"
|
3032
|
+
- "$ref": "#/definitions/SimpleRetriever"
|
3033
|
+
$parameters:
|
3034
|
+
type: object
|
3035
|
+
additionalProperties: true
|
3036
|
+
PropertyChunking:
|
3037
|
+
title: Property Chunking
|
3038
|
+
description: For APIs with restrictions on the amount of properties that can be requester per request, property chunking can be applied to make multiple requests with a subset of the properties.
|
3039
|
+
type: object
|
3040
|
+
required:
|
3041
|
+
- type
|
3042
|
+
- property_limit_type
|
3043
|
+
properties:
|
3044
|
+
type:
|
3045
|
+
type: string
|
3046
|
+
enum: [PropertyChunking]
|
3047
|
+
property_limit_type:
|
3048
|
+
title: Property Limit Type
|
3049
|
+
description: The type used to determine the maximum number of properties per chunk
|
3050
|
+
enum:
|
3051
|
+
- characters
|
3052
|
+
- property_count
|
3053
|
+
property_limit:
|
3054
|
+
title: Property Limit
|
3055
|
+
description: The maximum amount of properties that can be retrieved per request according to the limit type.
|
3056
|
+
type: integer
|
3057
|
+
record_merge_strategy:
|
3058
|
+
title: Record Merge Strategy
|
3059
|
+
description: Dictates how to records that require multiple requests to get all properties should be emitted to the destination
|
3060
|
+
anyOf:
|
3061
|
+
- "$ref": "#/definitions/EmitPartialRecordMergeStrategy"
|
3062
|
+
- "$ref": "#/definitions/GroupByKeyMergeStrategy"
|
3063
|
+
$parameters:
|
3064
|
+
type: object
|
3065
|
+
additionalProperties: true
|
3066
|
+
QueryProperties:
|
3067
|
+
title: Query Properties
|
3068
|
+
description: For APIs that require explicit specification of the properties to query for, this component specifies which property fields and how they are supplied to outbound requests.
|
3069
|
+
type: object
|
3070
|
+
required:
|
3071
|
+
- type
|
3072
|
+
- property_list
|
3073
|
+
properties:
|
3074
|
+
type:
|
3075
|
+
type: string
|
3076
|
+
enum: [QueryProperties]
|
3077
|
+
property_list:
|
3078
|
+
title: Property List
|
3079
|
+
description: The set of properties that will be queried for in the outbound request. This can either be statically defined or dynamic based on an API endpoint
|
3080
|
+
anyOf:
|
3081
|
+
- type: array
|
3082
|
+
items:
|
3083
|
+
type: string
|
3084
|
+
- "$ref": "#/definitions/PropertiesFromEndpoint"
|
3085
|
+
always_include_properties:
|
3086
|
+
title: Always Include Properties
|
3087
|
+
description: The list of properties that should be included in every set of properties when multiple chunks of properties are being requested.
|
3088
|
+
type: array
|
3089
|
+
items:
|
3090
|
+
type: string
|
3091
|
+
property_chunking:
|
3092
|
+
title: Property Chunking
|
3093
|
+
description: Defines how query properties will be grouped into smaller sets for APIs with limitations on the number of properties fetched per API request.
|
3094
|
+
"$ref": "#/definitions/PropertyChunking"
|
3095
|
+
$parameters:
|
3096
|
+
type: object
|
3097
|
+
additionalProperties: true
|
2971
3098
|
RecordFilter:
|
2972
3099
|
title: Record Filter
|
2973
3100
|
description: Filter applied on a list of records.
|
@@ -156,7 +156,7 @@ def duration(datestring: str) -> Union[datetime.timedelta, isodate.Duration]:
|
|
156
156
|
|
157
157
|
|
158
158
|
def format_datetime(
|
159
|
-
dt: Union[str, datetime.datetime], format: str, input_format: Optional[str] = None
|
159
|
+
dt: Union[str, datetime.datetime, int], format: str, input_format: Optional[str] = None
|
160
160
|
) -> str:
|
161
161
|
"""
|
162
162
|
Converts datetime to another format
|
@@ -170,9 +170,13 @@ def format_datetime(
|
|
170
170
|
"""
|
171
171
|
if isinstance(dt, datetime.datetime):
|
172
172
|
return dt.strftime(format)
|
173
|
-
|
174
|
-
|
175
|
-
|
173
|
+
|
174
|
+
if isinstance(dt, int):
|
175
|
+
dt_datetime = DatetimeParser().parse(dt, input_format if input_format else "%s")
|
176
|
+
else:
|
177
|
+
dt_datetime = (
|
178
|
+
datetime.datetime.strptime(dt, input_format) if input_format else str_to_datetime(dt)
|
179
|
+
)
|
176
180
|
return DatetimeParser().format(dt=dt_datetime, format=format)
|
177
181
|
|
178
182
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
2
|
+
|
1
3
|
# generated by datamodel-codegen:
|
2
4
|
# filename: declarative_component_schema.yaml
|
3
5
|
|
@@ -343,6 +345,10 @@ class Clamping(BaseModel):
|
|
343
345
|
target_details: Optional[Dict[str, Any]] = None
|
344
346
|
|
345
347
|
|
348
|
+
class EmitPartialRecordMergeStrategy(BaseModel):
|
349
|
+
type: Literal["EmitPartialRecordMergeStrategy"]
|
350
|
+
|
351
|
+
|
346
352
|
class Algorithm(Enum):
|
347
353
|
HS256 = "HS256"
|
348
354
|
HS384 = "HS384"
|
@@ -716,6 +722,17 @@ class ExponentialBackoffStrategy(BaseModel):
|
|
716
722
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
717
723
|
|
718
724
|
|
725
|
+
class GroupByKeyMergeStrategy(BaseModel):
|
726
|
+
type: Literal["GroupByKeyMergeStrategy"]
|
727
|
+
key: Union[str, List[str]] = Field(
|
728
|
+
...,
|
729
|
+
description="The name of the field on the record whose value will be used to group properties that were retrieved through multiple API requests.",
|
730
|
+
examples=["id", ["parent_id", "end_date"]],
|
731
|
+
title="Key",
|
732
|
+
)
|
733
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
734
|
+
|
735
|
+
|
719
736
|
class SessionTokenRequestBearerAuthenticator(BaseModel):
|
720
737
|
type: Literal["Bearer"]
|
721
738
|
|
@@ -1187,6 +1204,33 @@ class PrimaryKey(BaseModel):
|
|
1187
1204
|
)
|
1188
1205
|
|
1189
1206
|
|
1207
|
+
class PropertyLimitType(Enum):
|
1208
|
+
characters = "characters"
|
1209
|
+
property_count = "property_count"
|
1210
|
+
|
1211
|
+
|
1212
|
+
class PropertyChunking(BaseModel):
|
1213
|
+
type: Literal["PropertyChunking"]
|
1214
|
+
property_limit_type: PropertyLimitType = Field(
|
1215
|
+
...,
|
1216
|
+
description="The type used to determine the maximum number of properties per chunk",
|
1217
|
+
title="Property Limit Type",
|
1218
|
+
)
|
1219
|
+
property_limit: Optional[int] = Field(
|
1220
|
+
None,
|
1221
|
+
description="The maximum amount of properties that can be retrieved per request according to the limit type.",
|
1222
|
+
title="Property Limit",
|
1223
|
+
)
|
1224
|
+
record_merge_strategy: Optional[
|
1225
|
+
Union[EmitPartialRecordMergeStrategy, GroupByKeyMergeStrategy]
|
1226
|
+
] = Field(
|
1227
|
+
None,
|
1228
|
+
description="Dictates how to records that require multiple requests to get all properties should be emitted to the destination",
|
1229
|
+
title="Record Merge Strategy",
|
1230
|
+
)
|
1231
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1232
|
+
|
1233
|
+
|
1190
1234
|
class RecordFilter(BaseModel):
|
1191
1235
|
type: Literal["RecordFilter"]
|
1192
1236
|
condition: Optional[str] = Field(
|
@@ -2174,7 +2218,7 @@ class HttpRequester(BaseModel):
|
|
2174
2218
|
examples=[{"Output-Format": "JSON"}, {"Version": "{{ config['version'] }}"}],
|
2175
2219
|
title="Request Headers",
|
2176
2220
|
)
|
2177
|
-
request_parameters: Optional[Union[str, Dict[str, str]]] = Field(
|
2221
|
+
request_parameters: Optional[Union[str, Dict[str, Union[str, Any]]]] = Field(
|
2178
2222
|
None,
|
2179
2223
|
description="Specifies the query parameters that should be set on an outgoing HTTP request given the inputs.",
|
2180
2224
|
examples=[
|
@@ -2264,6 +2308,40 @@ class ParentStreamConfig(BaseModel):
|
|
2264
2308
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2265
2309
|
|
2266
2310
|
|
2311
|
+
class PropertiesFromEndpoint(BaseModel):
|
2312
|
+
type: Literal["PropertiesFromEndpoint"]
|
2313
|
+
property_field_path: List[str] = Field(
|
2314
|
+
...,
|
2315
|
+
description="Describes the path to the field that should be extracted",
|
2316
|
+
examples=[["name"]],
|
2317
|
+
)
|
2318
|
+
retriever: Union[CustomRetriever, SimpleRetriever] = Field(
|
2319
|
+
...,
|
2320
|
+
description="Requester component that describes how to fetch the properties to query from a remote API endpoint.",
|
2321
|
+
)
|
2322
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2323
|
+
|
2324
|
+
|
2325
|
+
class QueryProperties(BaseModel):
|
2326
|
+
type: Literal["QueryProperties"]
|
2327
|
+
property_list: Union[List[str], PropertiesFromEndpoint] = Field(
|
2328
|
+
...,
|
2329
|
+
description="The set of properties that will be queried for in the outbound request. This can either be statically defined or dynamic based on an API endpoint",
|
2330
|
+
title="Property List",
|
2331
|
+
)
|
2332
|
+
always_include_properties: Optional[List[str]] = Field(
|
2333
|
+
None,
|
2334
|
+
description="The list of properties that should be included in every set of properties when multiple chunks of properties are being requested.",
|
2335
|
+
title="Always Include Properties",
|
2336
|
+
)
|
2337
|
+
property_chunking: Optional[PropertyChunking] = Field(
|
2338
|
+
None,
|
2339
|
+
description="Defines how query properties will be grouped into smaller sets for APIs with limitations on the number of properties fetched per API request.",
|
2340
|
+
title="Property Chunking",
|
2341
|
+
)
|
2342
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2343
|
+
|
2344
|
+
|
2267
2345
|
class StateDelegatingStream(BaseModel):
|
2268
2346
|
type: Literal["StateDelegatingStream"]
|
2269
2347
|
name: str = Field(..., description="The stream name.", example=["Users"], title="Name")
|
@@ -2512,5 +2590,6 @@ DeclarativeStream.update_forward_refs()
|
|
2512
2590
|
SessionTokenAuthenticator.update_forward_refs()
|
2513
2591
|
DynamicSchemaLoader.update_forward_refs()
|
2514
2592
|
ParentStreamConfig.update_forward_refs()
|
2593
|
+
PropertiesFromEndpoint.update_forward_refs()
|
2515
2594
|
SimpleRetriever.update_forward_refs()
|
2516
2595
|
AsyncRetriever.update_forward_refs()
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#
|
2
|
-
# Copyright (c)
|
2
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
5
|
from __future__ import annotations
|
@@ -227,6 +227,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
227
227
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
228
228
|
FlattenFields as FlattenFieldsModel,
|
229
229
|
)
|
230
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
231
|
+
GroupByKeyMergeStrategy as GroupByKeyMergeStrategyModel,
|
232
|
+
)
|
230
233
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
231
234
|
GroupingPartitionRouter as GroupingPartitionRouterModel,
|
232
235
|
)
|
@@ -317,6 +320,18 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
317
320
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
318
321
|
ParentStreamConfig as ParentStreamConfigModel,
|
319
322
|
)
|
323
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
324
|
+
PropertiesFromEndpoint as PropertiesFromEndpointModel,
|
325
|
+
)
|
326
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
327
|
+
PropertyChunking as PropertyChunkingModel,
|
328
|
+
)
|
329
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
330
|
+
PropertyLimitType as PropertyLimitTypeModel,
|
331
|
+
)
|
332
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
333
|
+
QueryProperties as QueryPropertiesModel,
|
334
|
+
)
|
320
335
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
321
336
|
Rate as RateModel,
|
322
337
|
)
|
@@ -425,6 +440,15 @@ from airbyte_cdk.sources.declarative.requesters.paginators.strategies import (
|
|
425
440
|
PageIncrement,
|
426
441
|
StopConditionPaginationStrategyDecorator,
|
427
442
|
)
|
443
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties import (
|
444
|
+
GroupByKey,
|
445
|
+
PropertiesFromEndpoint,
|
446
|
+
PropertyChunking,
|
447
|
+
QueryProperties,
|
448
|
+
)
|
449
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties.property_chunking import (
|
450
|
+
PropertyLimitType,
|
451
|
+
)
|
428
452
|
from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType
|
429
453
|
from airbyte_cdk.sources.declarative.requesters.request_options import (
|
430
454
|
DatetimeBasedRequestOptionsProvider,
|
@@ -588,6 +612,7 @@ class ModelToComponentFactory:
|
|
588
612
|
ResponseToFileExtractorModel: self.create_response_to_file_extractor,
|
589
613
|
ExponentialBackoffStrategyModel: self.create_exponential_backoff_strategy,
|
590
614
|
SessionTokenAuthenticatorModel: self.create_session_token_authenticator,
|
615
|
+
GroupByKeyMergeStrategyModel: self.create_group_by_key,
|
591
616
|
HttpRequesterModel: self.create_http_requester,
|
592
617
|
HttpResponseFilterModel: self.create_http_response_filter,
|
593
618
|
InlineSchemaLoaderModel: self.create_inline_schema_loader,
|
@@ -617,6 +642,9 @@ class ModelToComponentFactory:
|
|
617
642
|
OffsetIncrementModel: self.create_offset_increment,
|
618
643
|
PageIncrementModel: self.create_page_increment,
|
619
644
|
ParentStreamConfigModel: self.create_parent_stream_config,
|
645
|
+
PropertiesFromEndpointModel: self.create_properties_from_endpoint,
|
646
|
+
PropertyChunkingModel: self.create_property_chunking,
|
647
|
+
QueryPropertiesModel: self.create_query_properties,
|
620
648
|
RecordFilterModel: self.create_record_filter,
|
621
649
|
RecordSelectorModel: self.create_record_selector,
|
622
650
|
RemoveFieldsModel: self.create_remove_fields,
|
@@ -2047,8 +2075,8 @@ class ModelToComponentFactory:
|
|
2047
2075
|
parameters=model.parameters or {},
|
2048
2076
|
)
|
2049
2077
|
|
2078
|
+
@staticmethod
|
2050
2079
|
def create_response_to_file_extractor(
|
2051
|
-
self,
|
2052
2080
|
model: ResponseToFileExtractorModel,
|
2053
2081
|
**kwargs: Any,
|
2054
2082
|
) -> ResponseToFileExtractor:
|
@@ -2062,11 +2090,16 @@ class ModelToComponentFactory:
|
|
2062
2090
|
factor=model.factor or 5, parameters=model.parameters or {}, config=config
|
2063
2091
|
)
|
2064
2092
|
|
2093
|
+
@staticmethod
|
2094
|
+
def create_group_by_key(model: GroupByKeyMergeStrategyModel, config: Config) -> GroupByKey:
|
2095
|
+
return GroupByKey(model.key, config=config, parameters=model.parameters or {})
|
2096
|
+
|
2065
2097
|
def create_http_requester(
|
2066
2098
|
self,
|
2067
2099
|
model: HttpRequesterModel,
|
2068
2100
|
config: Config,
|
2069
2101
|
decoder: Decoder = JsonDecoder(parameters={}),
|
2102
|
+
query_properties_key: Optional[str] = None,
|
2070
2103
|
*,
|
2071
2104
|
name: str,
|
2072
2105
|
) -> HttpRequester:
|
@@ -2099,6 +2132,7 @@ class ModelToComponentFactory:
|
|
2099
2132
|
request_body_json=model.request_body_json,
|
2100
2133
|
request_headers=model.request_headers,
|
2101
2134
|
request_parameters=model.request_parameters,
|
2135
|
+
query_properties_key=query_properties_key,
|
2102
2136
|
config=config,
|
2103
2137
|
parameters=model.parameters or {},
|
2104
2138
|
)
|
@@ -2566,6 +2600,79 @@ class ModelToComponentFactory:
|
|
2566
2600
|
lazy_read_pointer=model_lazy_read_pointer,
|
2567
2601
|
)
|
2568
2602
|
|
2603
|
+
def create_properties_from_endpoint(
|
2604
|
+
self, model: PropertiesFromEndpointModel, config: Config, **kwargs: Any
|
2605
|
+
) -> PropertiesFromEndpoint:
|
2606
|
+
name = "property_retriever"
|
2607
|
+
retriever = self._create_component_from_model(
|
2608
|
+
model=model.retriever,
|
2609
|
+
config=config,
|
2610
|
+
name=name,
|
2611
|
+
primary_key=None,
|
2612
|
+
stream_slicer=None,
|
2613
|
+
transformations=[],
|
2614
|
+
)
|
2615
|
+
return PropertiesFromEndpoint(
|
2616
|
+
property_field_path=model.property_field_path,
|
2617
|
+
retriever=retriever,
|
2618
|
+
config=config,
|
2619
|
+
parameters=model.parameters or {},
|
2620
|
+
)
|
2621
|
+
|
2622
|
+
def create_property_chunking(
|
2623
|
+
self, model: PropertyChunkingModel, config: Config, **kwargs: Any
|
2624
|
+
) -> PropertyChunking:
|
2625
|
+
record_merge_strategy = (
|
2626
|
+
self._create_component_from_model(
|
2627
|
+
model=model.record_merge_strategy, config=config, **kwargs
|
2628
|
+
)
|
2629
|
+
if model.record_merge_strategy
|
2630
|
+
else None
|
2631
|
+
)
|
2632
|
+
|
2633
|
+
property_limit_type: PropertyLimitType
|
2634
|
+
match model.property_limit_type:
|
2635
|
+
case PropertyLimitTypeModel.property_count:
|
2636
|
+
property_limit_type = PropertyLimitType.property_count
|
2637
|
+
case PropertyLimitTypeModel.characters:
|
2638
|
+
property_limit_type = PropertyLimitType.characters
|
2639
|
+
case _:
|
2640
|
+
raise ValueError(f"Invalid PropertyLimitType {property_limit_type}")
|
2641
|
+
|
2642
|
+
return PropertyChunking(
|
2643
|
+
property_limit_type=property_limit_type,
|
2644
|
+
property_limit=model.property_limit,
|
2645
|
+
record_merge_strategy=record_merge_strategy,
|
2646
|
+
config=config,
|
2647
|
+
parameters=model.parameters or {},
|
2648
|
+
)
|
2649
|
+
|
2650
|
+
def create_query_properties(
|
2651
|
+
self, model: QueryPropertiesModel, config: Config, **kwargs: Any
|
2652
|
+
) -> QueryProperties:
|
2653
|
+
if isinstance(model.property_list, list):
|
2654
|
+
property_list = model.property_list
|
2655
|
+
else:
|
2656
|
+
property_list = self._create_component_from_model(
|
2657
|
+
model=model.property_list, config=config, **kwargs
|
2658
|
+
)
|
2659
|
+
|
2660
|
+
property_chunking = (
|
2661
|
+
self._create_component_from_model(
|
2662
|
+
model=model.property_chunking, config=config, **kwargs
|
2663
|
+
)
|
2664
|
+
if model.property_chunking
|
2665
|
+
else None
|
2666
|
+
)
|
2667
|
+
|
2668
|
+
return QueryProperties(
|
2669
|
+
property_list=property_list,
|
2670
|
+
always_include_properties=model.always_include_properties,
|
2671
|
+
property_chunking=property_chunking,
|
2672
|
+
config=config,
|
2673
|
+
parameters=model.parameters or {},
|
2674
|
+
)
|
2675
|
+
|
2569
2676
|
@staticmethod
|
2570
2677
|
def create_record_filter(
|
2571
2678
|
model: RecordFilterModel, config: Config, **kwargs: Any
|
@@ -2718,9 +2825,6 @@ class ModelToComponentFactory:
|
|
2718
2825
|
if model.decoder
|
2719
2826
|
else JsonDecoder(parameters={})
|
2720
2827
|
)
|
2721
|
-
requester = self._create_component_from_model(
|
2722
|
-
model=model.requester, decoder=decoder, config=config, name=name
|
2723
|
-
)
|
2724
2828
|
record_selector = self._create_component_from_model(
|
2725
2829
|
model=model.record_selector,
|
2726
2830
|
name=name,
|
@@ -2729,6 +2833,53 @@ class ModelToComponentFactory:
|
|
2729
2833
|
transformations=transformations,
|
2730
2834
|
client_side_incremental_sync=client_side_incremental_sync,
|
2731
2835
|
)
|
2836
|
+
|
2837
|
+
query_properties: Optional[QueryProperties] = None
|
2838
|
+
query_properties_key: Optional[str] = None
|
2839
|
+
if (
|
2840
|
+
hasattr(model.requester, "request_parameters")
|
2841
|
+
and model.requester.request_parameters
|
2842
|
+
and isinstance(model.requester.request_parameters, Mapping)
|
2843
|
+
):
|
2844
|
+
query_properties_definitions = []
|
2845
|
+
for key, request_parameter in model.requester.request_parameters.items():
|
2846
|
+
if (
|
2847
|
+
isinstance(request_parameter, Mapping)
|
2848
|
+
and request_parameter.get("type") == "QueryProperties"
|
2849
|
+
):
|
2850
|
+
query_properties_key = key
|
2851
|
+
query_properties_definitions.append(request_parameter)
|
2852
|
+
elif not isinstance(request_parameter, str):
|
2853
|
+
raise ValueError(
|
2854
|
+
f"Each element of request_parameters should be of type str or QueryProperties, but received {request_parameter.get('type')}"
|
2855
|
+
)
|
2856
|
+
|
2857
|
+
if len(query_properties_definitions) > 1:
|
2858
|
+
raise ValueError(
|
2859
|
+
f"request_parameters should only define one QueryProperties field, but found {len(query_properties_definitions)}"
|
2860
|
+
)
|
2861
|
+
|
2862
|
+
if len(query_properties_definitions) == 1:
|
2863
|
+
query_properties = self.create_component(
|
2864
|
+
model_type=QueryPropertiesModel,
|
2865
|
+
component_definition=query_properties_definitions[0],
|
2866
|
+
config=config,
|
2867
|
+
)
|
2868
|
+
|
2869
|
+
# Removes QueryProperties components from the interpolated mappings because it will be resolved in
|
2870
|
+
# the provider from the slice directly instead of through jinja interpolation
|
2871
|
+
if isinstance(model.requester.request_parameters, Mapping):
|
2872
|
+
model.requester.request_parameters = self._remove_query_properties(
|
2873
|
+
model.requester.request_parameters
|
2874
|
+
)
|
2875
|
+
|
2876
|
+
requester = self._create_component_from_model(
|
2877
|
+
model=model.requester,
|
2878
|
+
decoder=decoder,
|
2879
|
+
query_properties_key=query_properties_key,
|
2880
|
+
config=config,
|
2881
|
+
name=name,
|
2882
|
+
)
|
2732
2883
|
url_base = (
|
2733
2884
|
model.requester.url_base
|
2734
2885
|
if hasattr(model.requester, "url_base")
|
@@ -2834,9 +2985,42 @@ class ModelToComponentFactory:
|
|
2834
2985
|
cursor=cursor,
|
2835
2986
|
config=config,
|
2836
2987
|
ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
|
2988
|
+
additional_query_properties=query_properties,
|
2837
2989
|
parameters=model.parameters or {},
|
2838
2990
|
)
|
2839
2991
|
|
2992
|
+
@staticmethod
|
2993
|
+
def _remove_query_properties(
|
2994
|
+
request_parameters: Mapping[str, Union[Any, str]],
|
2995
|
+
) -> Mapping[str, Union[Any, str]]:
|
2996
|
+
return {
|
2997
|
+
parameter_field: request_parameter
|
2998
|
+
for parameter_field, request_parameter in request_parameters.items()
|
2999
|
+
if not isinstance(request_parameter, Mapping)
|
3000
|
+
or not request_parameter.get("type") == "QueryProperties"
|
3001
|
+
}
|
3002
|
+
|
3003
|
+
@staticmethod
|
3004
|
+
def _translate_query_properties_to_interpolated_strings(
|
3005
|
+
request_parameters: Mapping[str, Union[Any, str]],
|
3006
|
+
) -> Mapping[str, Union[Any, str]]:
|
3007
|
+
# todo blai: remove this since unused
|
3008
|
+
new_request_parameters = dict()
|
3009
|
+
for key, request_parameter in request_parameters.items():
|
3010
|
+
if (
|
3011
|
+
isinstance(request_parameter, Mapping)
|
3012
|
+
and request_parameter.get("type") == "QueryProperties"
|
3013
|
+
):
|
3014
|
+
# This may seem like this could be combined into the above conditional, but this is separated
|
3015
|
+
# so that we do not add the properties into the new request_parameters mapping
|
3016
|
+
if request_parameter.get("inject_into"):
|
3017
|
+
new_request_parameters[key] = (
|
3018
|
+
"{{ stream_partition.extra_fields['query_properties'] }}"
|
3019
|
+
)
|
3020
|
+
else:
|
3021
|
+
new_request_parameters[key] = request_parameter
|
3022
|
+
return new_request_parameters
|
3023
|
+
|
2840
3024
|
def create_state_delegating_stream(
|
2841
3025
|
self,
|
2842
3026
|
model: StateDelegatingStreamModel,
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties.group_by_key import GroupByKey
|
4
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties.properties_from_endpoint import (
|
5
|
+
PropertiesFromEndpoint,
|
6
|
+
)
|
7
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties.property_chunking import (
|
8
|
+
PropertyChunking,
|
9
|
+
)
|
10
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties.query_properties import (
|
11
|
+
QueryProperties,
|
12
|
+
)
|
13
|
+
|
14
|
+
__all__ = ["GroupByKey", "PropertiesFromEndpoint", "PropertyChunking", "QueryProperties"]
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from dataclasses import InitVar, dataclass
|
4
|
+
from typing import Any, List, Mapping, Union
|
5
|
+
|
6
|
+
from airbyte_cdk.sources.types import Config, Record
|
7
|
+
|
8
|
+
|
9
|
+
@dataclass
|
10
|
+
class GroupByKey:
|
11
|
+
"""
|
12
|
+
tbd
|
13
|
+
"""
|
14
|
+
|
15
|
+
key: Union[str, List[str]]
|
16
|
+
parameters: InitVar[Mapping[str, Any]]
|
17
|
+
config: Config
|
18
|
+
|
19
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
20
|
+
self._keys = [self.key] if isinstance(self.key, str) else self.key
|
21
|
+
|
22
|
+
def get_group_key(self, record: Record) -> str:
|
23
|
+
resolved_keys = [str(record.data.get(key)) for key in self._keys]
|
24
|
+
return ",".join(resolved_keys)
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from dataclasses import InitVar, dataclass
|
4
|
+
from typing import Any, Iterable, List, Mapping, Optional, Union
|
5
|
+
|
6
|
+
import dpath
|
7
|
+
|
8
|
+
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
9
|
+
from airbyte_cdk.sources.declarative.retrievers import Retriever
|
10
|
+
from airbyte_cdk.sources.types import Config, StreamSlice
|
11
|
+
|
12
|
+
|
13
|
+
@dataclass
|
14
|
+
class PropertiesFromEndpoint:
|
15
|
+
"""
|
16
|
+
tbd
|
17
|
+
"""
|
18
|
+
|
19
|
+
property_field_path: List[str]
|
20
|
+
retriever: Retriever
|
21
|
+
config: Config
|
22
|
+
parameters: InitVar[Mapping[str, Any]]
|
23
|
+
|
24
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
25
|
+
self._property_field_path = [
|
26
|
+
InterpolatedString(string=property_field, parameters=parameters)
|
27
|
+
for property_field in self.property_field_path
|
28
|
+
]
|
29
|
+
|
30
|
+
def get_properties_from_endpoint(self, stream_slice: Optional[StreamSlice]) -> Iterable[str]:
|
31
|
+
response_properties = self.retriever.read_records(
|
32
|
+
records_schema={}, stream_slice=stream_slice
|
33
|
+
)
|
34
|
+
for property_obj in response_properties:
|
35
|
+
path = [
|
36
|
+
node.eval(self.config) if not isinstance(node, str) else node
|
37
|
+
for node in self._property_field_path
|
38
|
+
]
|
39
|
+
|
40
|
+
yield dpath.get(property_obj, path) # type: ignore # extracted will be a MutableMapping, given input data structure
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from dataclasses import InitVar, dataclass
|
4
|
+
from enum import Enum
|
5
|
+
from typing import Any, Iterable, List, Mapping, Optional
|
6
|
+
|
7
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties import GroupByKey
|
8
|
+
from airbyte_cdk.sources.types import Config, Record
|
9
|
+
|
10
|
+
|
11
|
+
class PropertyLimitType(Enum):
|
12
|
+
"""
|
13
|
+
yeah
|
14
|
+
"""
|
15
|
+
|
16
|
+
characters = "characters"
|
17
|
+
property_count = "property_count"
|
18
|
+
|
19
|
+
|
20
|
+
@dataclass
|
21
|
+
class PropertyChunking:
|
22
|
+
"""
|
23
|
+
tbd
|
24
|
+
"""
|
25
|
+
|
26
|
+
property_limit_type: PropertyLimitType
|
27
|
+
property_limit: Optional[int]
|
28
|
+
record_merge_strategy: Optional[
|
29
|
+
GroupByKey
|
30
|
+
] # This should eventually be some sort of interface or type
|
31
|
+
parameters: InitVar[Mapping[str, Any]]
|
32
|
+
config: Config
|
33
|
+
|
34
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
35
|
+
self._record_merge_strategy = self.record_merge_strategy or GroupByKey(
|
36
|
+
key="id", config=self.config, parameters=parameters
|
37
|
+
)
|
38
|
+
|
39
|
+
def get_request_property_chunks(
|
40
|
+
self, property_fields: Iterable[str], always_include_properties: Optional[List[str]]
|
41
|
+
) -> Iterable[List[str]]:
|
42
|
+
if not self.property_limit:
|
43
|
+
single_property_chunk = list(property_fields)
|
44
|
+
if always_include_properties:
|
45
|
+
single_property_chunk.extend(always_include_properties)
|
46
|
+
yield single_property_chunk
|
47
|
+
return
|
48
|
+
current_chunk = list(always_include_properties) if always_include_properties else []
|
49
|
+
chunk_size = 0
|
50
|
+
for property_field in property_fields:
|
51
|
+
property_field_size = (
|
52
|
+
len(property_field)
|
53
|
+
if self.property_limit_type == PropertyLimitType.characters
|
54
|
+
else 1
|
55
|
+
)
|
56
|
+
if chunk_size + property_field_size > self.property_limit:
|
57
|
+
yield current_chunk
|
58
|
+
current_chunk = list(always_include_properties) if always_include_properties else []
|
59
|
+
chunk_size = 0
|
60
|
+
current_chunk.append(property_field)
|
61
|
+
chunk_size += property_field_size
|
62
|
+
yield current_chunk
|
63
|
+
|
64
|
+
def get_merge_key(self, record: Record) -> str:
|
65
|
+
return self._record_merge_strategy.get_group_key(record=record)
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from dataclasses import InitVar, dataclass
|
4
|
+
from typing import Any, Iterable, List, Mapping, Optional, Union
|
5
|
+
|
6
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties import (
|
7
|
+
PropertiesFromEndpoint,
|
8
|
+
PropertyChunking,
|
9
|
+
)
|
10
|
+
from airbyte_cdk.sources.types import Config, StreamSlice
|
11
|
+
|
12
|
+
|
13
|
+
@dataclass
|
14
|
+
class QueryProperties:
|
15
|
+
"""
|
16
|
+
tbd
|
17
|
+
"""
|
18
|
+
|
19
|
+
property_list: Optional[Union[List[str], PropertiesFromEndpoint]]
|
20
|
+
always_include_properties: Optional[List[str]]
|
21
|
+
property_chunking: Optional[PropertyChunking]
|
22
|
+
config: Config
|
23
|
+
parameters: InitVar[Mapping[str, Any]]
|
24
|
+
|
25
|
+
def get_request_property_chunks(
|
26
|
+
self, stream_slice: Optional[StreamSlice] = None
|
27
|
+
) -> Iterable[List[str]]:
|
28
|
+
fields: Union[Iterable[str], List[str]]
|
29
|
+
if isinstance(self.property_list, PropertiesFromEndpoint):
|
30
|
+
fields = self.property_list.get_properties_from_endpoint(stream_slice=stream_slice)
|
31
|
+
else:
|
32
|
+
fields = self.property_list if self.property_list else []
|
33
|
+
|
34
|
+
if self.property_chunking:
|
35
|
+
yield from self.property_chunking.get_request_property_chunks(
|
36
|
+
property_fields=fields, always_include_properties=self.always_include_properties
|
37
|
+
)
|
38
|
+
else:
|
39
|
+
yield from [list(fields)]
|
40
|
+
|
41
|
+
def has_multiple_chunks(self, stream_slice: Optional[StreamSlice]) -> bool:
|
42
|
+
property_chunks = iter(self.get_request_property_chunks(stream_slice=stream_slice))
|
43
|
+
try:
|
44
|
+
next(property_chunks)
|
45
|
+
next(property_chunks)
|
46
|
+
return True
|
47
|
+
except StopIteration:
|
48
|
+
return False
|
airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
#
|
2
|
-
# Copyright (c)
|
2
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
5
|
from dataclasses import InitVar, dataclass, field
|
6
|
-
from typing import Any, Mapping, MutableMapping, Optional, Union
|
6
|
+
from typing import Any, List, Mapping, MutableMapping, Optional, Union
|
7
7
|
|
8
8
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_nested_mapping import NestedMapping
|
9
9
|
from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_nested_request_input_provider import (
|
@@ -40,6 +40,7 @@ class InterpolatedRequestOptionsProvider(RequestOptionsProvider):
|
|
40
40
|
request_headers: Optional[RequestInput] = None
|
41
41
|
request_body_data: Optional[RequestInput] = None
|
42
42
|
request_body_json: Optional[NestedMapping] = None
|
43
|
+
query_properties_key: Optional[str] = None
|
43
44
|
|
44
45
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
45
46
|
if self.request_parameters is None:
|
@@ -83,6 +84,28 @@ class InterpolatedRequestOptionsProvider(RequestOptionsProvider):
|
|
83
84
|
valid_value_types=ValidRequestTypes,
|
84
85
|
)
|
85
86
|
if isinstance(interpolated_value, dict):
|
87
|
+
if self.query_properties_key:
|
88
|
+
if not stream_slice:
|
89
|
+
raise ValueError(
|
90
|
+
"stream_slice should not be None if query properties in requests is enabled. Please contact Airbyte Support"
|
91
|
+
)
|
92
|
+
elif (
|
93
|
+
"query_properties" not in stream_slice.extra_fields
|
94
|
+
or stream_slice.extra_fields.get("query_properties") is None
|
95
|
+
):
|
96
|
+
raise ValueError(
|
97
|
+
"QueryProperties component is defined but stream_partition does not contain query_properties. Please contact Airbyte Support"
|
98
|
+
)
|
99
|
+
elif not isinstance(stream_slice.extra_fields.get("query_properties"), List):
|
100
|
+
raise ValueError(
|
101
|
+
"QueryProperties component is defined but stream_slice.extra_fields.query_properties is not a List. Please contact Airbyte Support"
|
102
|
+
)
|
103
|
+
interpolated_value = {
|
104
|
+
**interpolated_value,
|
105
|
+
self.query_properties_key: ",".join(
|
106
|
+
stream_slice.extra_fields.get("query_properties") # type: ignore # Earlier type checks validate query_properties type
|
107
|
+
),
|
108
|
+
}
|
86
109
|
return interpolated_value
|
87
110
|
return {}
|
88
111
|
|
@@ -1,8 +1,9 @@
|
|
1
1
|
#
|
2
|
-
# Copyright (c)
|
2
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
5
|
import json
|
6
|
+
from collections import defaultdict
|
6
7
|
from dataclasses import InitVar, dataclass, field
|
7
8
|
from functools import partial
|
8
9
|
from itertools import islice
|
@@ -12,6 +13,7 @@ from typing import (
|
|
12
13
|
Iterable,
|
13
14
|
List,
|
14
15
|
Mapping,
|
16
|
+
MutableMapping,
|
15
17
|
Optional,
|
16
18
|
Set,
|
17
19
|
Tuple,
|
@@ -31,6 +33,7 @@ from airbyte_cdk.sources.declarative.partition_routers.single_partition_router i
|
|
31
33
|
)
|
32
34
|
from airbyte_cdk.sources.declarative.requesters.paginators.no_pagination import NoPagination
|
33
35
|
from airbyte_cdk.sources.declarative.requesters.paginators.paginator import Paginator
|
36
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties import QueryProperties
|
34
37
|
from airbyte_cdk.sources.declarative.requesters.request_options import (
|
35
38
|
DefaultRequestOptionsProvider,
|
36
39
|
RequestOptionsProvider,
|
@@ -88,6 +91,7 @@ class SimpleRetriever(Retriever):
|
|
88
91
|
)
|
89
92
|
cursor: Optional[DeclarativeCursor] = None
|
90
93
|
ignore_stream_slicer_parameters_on_paginated_requests: bool = False
|
94
|
+
additional_query_properties: Optional[QueryProperties] = None
|
91
95
|
|
92
96
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
93
97
|
self._paginator = self.paginator or NoPagination(parameters=parameters)
|
@@ -445,43 +449,108 @@ class SimpleRetriever(Retriever):
|
|
445
449
|
:param stream_slice: The stream slice to read data for
|
446
450
|
:return: The records read from the API source
|
447
451
|
"""
|
448
|
-
_slice = stream_slice or StreamSlice(partition={}, cursor_slice={}) # None-check
|
449
452
|
|
453
|
+
if self.additional_query_properties:
|
454
|
+
property_chunks = list(
|
455
|
+
self.additional_query_properties.get_request_property_chunks(
|
456
|
+
stream_slice=stream_slice
|
457
|
+
)
|
458
|
+
)
|
459
|
+
has_multiple_chunks = self.additional_query_properties.has_multiple_chunks(
|
460
|
+
stream_slice=stream_slice
|
461
|
+
)
|
462
|
+
else:
|
463
|
+
property_chunks = [[""]]
|
464
|
+
has_multiple_chunks = False
|
465
|
+
merged_records: MutableMapping[str, Any] = defaultdict(dict)
|
466
|
+
_slice = stream_slice or StreamSlice(partition={}, cursor_slice={}) # None-check
|
450
467
|
most_recent_record_from_slice = None
|
451
|
-
record_generator = partial(
|
452
|
-
self._parse_records,
|
453
|
-
stream_slice=stream_slice,
|
454
|
-
stream_state=self.state or {},
|
455
|
-
records_schema=records_schema,
|
456
|
-
)
|
457
|
-
|
458
|
-
if self.cursor and isinstance(self.cursor, ResumableFullRefreshCursor):
|
459
|
-
stream_state = self.state
|
460
|
-
|
461
|
-
# Before syncing the RFR stream, we check if the job's prior attempt was successful and don't need to
|
462
|
-
# fetch more records. The platform deletes stream state for full refresh streams before starting a
|
463
|
-
# new job, so we don't need to worry about this value existing for the initial attempt
|
464
|
-
if stream_state.get(FULL_REFRESH_SYNC_COMPLETE_KEY):
|
465
|
-
return
|
466
468
|
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
469
|
+
if self.additional_query_properties:
|
470
|
+
for properties in property_chunks:
|
471
|
+
_slice = StreamSlice(
|
472
|
+
partition=_slice.partition or {},
|
473
|
+
cursor_slice=_slice.cursor_slice or {},
|
474
|
+
extra_fields={"query_properties": properties},
|
475
|
+
) # None-check
|
476
|
+
|
477
|
+
record_generator = partial(
|
478
|
+
self._parse_records,
|
479
|
+
stream_slice=_slice,
|
480
|
+
stream_state=self.state or {},
|
481
|
+
records_schema=records_schema,
|
479
482
|
)
|
480
|
-
yield stream_data
|
481
483
|
|
484
|
+
for stream_data in self._read_pages(record_generator, self.state, _slice):
|
485
|
+
current_record = self._extract_record(stream_data, _slice)
|
486
|
+
if self.cursor and current_record:
|
487
|
+
self.cursor.observe(_slice, current_record)
|
488
|
+
|
489
|
+
# Latest record read, not necessarily within slice boundaries.
|
490
|
+
# TODO Remove once all custom components implement `observe` method.
|
491
|
+
# https://github.com/airbytehq/airbyte-internal-issues/issues/6955
|
492
|
+
most_recent_record_from_slice = self._get_most_recent_record(
|
493
|
+
most_recent_record_from_slice, current_record, _slice
|
494
|
+
)
|
495
|
+
|
496
|
+
# Record merging should only be done if there are multiple property chunks. Otherwise,
|
497
|
+
# yielding immediately is more efficient so records can be emitted immediately
|
498
|
+
if (
|
499
|
+
has_multiple_chunks
|
500
|
+
and self.additional_query_properties.property_chunking
|
501
|
+
and current_record
|
502
|
+
):
|
503
|
+
merge_key = (
|
504
|
+
self.additional_query_properties.property_chunking.get_merge_key(
|
505
|
+
current_record
|
506
|
+
)
|
507
|
+
)
|
508
|
+
merged_records[merge_key].update(current_record)
|
509
|
+
else:
|
510
|
+
yield stream_data
|
482
511
|
if self.cursor:
|
483
512
|
self.cursor.close_slice(_slice, most_recent_record_from_slice)
|
484
|
-
|
513
|
+
|
514
|
+
if has_multiple_chunks:
|
515
|
+
yield from merged_records.values()
|
516
|
+
else:
|
517
|
+
_slice = stream_slice or StreamSlice(partition={}, cursor_slice={}) # None-check
|
518
|
+
|
519
|
+
most_recent_record_from_slice = None
|
520
|
+
record_generator = partial(
|
521
|
+
self._parse_records,
|
522
|
+
stream_slice=stream_slice,
|
523
|
+
stream_state=self.state or {},
|
524
|
+
records_schema=records_schema,
|
525
|
+
)
|
526
|
+
|
527
|
+
if self.cursor and isinstance(self.cursor, ResumableFullRefreshCursor):
|
528
|
+
stream_state = self.state
|
529
|
+
|
530
|
+
# Before syncing the RFR stream, we check if the job's prior attempt was successful and don't need to
|
531
|
+
# fetch more records. The platform deletes stream state for full refresh streams before starting a
|
532
|
+
# new job, so we don't need to worry about this value existing for the initial attempt
|
533
|
+
if stream_state.get(FULL_REFRESH_SYNC_COMPLETE_KEY):
|
534
|
+
return
|
535
|
+
|
536
|
+
yield from self._read_single_page(record_generator, stream_state, _slice)
|
537
|
+
else:
|
538
|
+
for stream_data in self._read_pages(record_generator, self.state, _slice):
|
539
|
+
current_record = self._extract_record(stream_data, _slice)
|
540
|
+
if self.cursor and current_record:
|
541
|
+
self.cursor.observe(_slice, current_record)
|
542
|
+
|
543
|
+
# Latest record read, not necessarily within slice boundaries.
|
544
|
+
# TODO Remove once all custom components implement `observe` method.
|
545
|
+
# https://github.com/airbytehq/airbyte-internal-issues/issues/6955
|
546
|
+
most_recent_record_from_slice = self._get_most_recent_record(
|
547
|
+
most_recent_record_from_slice, current_record, _slice
|
548
|
+
)
|
549
|
+
yield stream_data
|
550
|
+
|
551
|
+
if self.cursor:
|
552
|
+
self.cursor.close_slice(_slice, most_recent_record_from_slice)
|
553
|
+
return
|
485
554
|
|
486
555
|
def _get_most_recent_record(
|
487
556
|
self,
|
@@ -71,7 +71,7 @@ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=uhy0dRkA
|
|
71
71
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
72
72
|
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=_zGNGq31RNy_0QBLt_EcTvgPyhj7urPdx6oA3M5-r3o,3150
|
73
73
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
|
74
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256
|
74
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=-7gyAzojFjdkQ0IyKUwYyT91uwSSRO2pstlVjHyEaGc,158041
|
75
75
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
|
76
76
|
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=dCRlddBUSaJmBNBz1pSO1r2rTw8AP5d2_vlmIeGs2gg,10767
|
77
77
|
airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=JHb_0d3SE6kNY10mxA5YBEKPeSbsWYjByq1gUQxepoE,953
|
@@ -108,19 +108,19 @@ airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py,sha
|
|
108
108
|
airbyte_cdk/sources/declarative/interpolation/interpolated_string.py,sha256=CQkHqGlfa87G6VYMtBAQWin7ECKpfMdrDcg0JO5_rhc,3212
|
109
109
|
airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=9IoeuWam3L6GyN10L6U8xNWXmkt9cnahSDNkez1OmFY,982
|
110
110
|
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=UQeuS4Vpyp4hlOn-R3tRyeBX0e9IoV6jQ6gH-Jz8lY0,7182
|
111
|
-
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=
|
111
|
+
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=CF2L_r73TCd1Wssq9_tjFPb4c872QM8oW6NL-DIR0h4,5226
|
112
112
|
airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=kFfyVpX-babuF4YzUHm7xfx_3M4M-GwJsrOWuez1z9Q,18894
|
113
113
|
airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
114
114
|
airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
|
115
115
|
airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
|
116
116
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
|
117
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=
|
117
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=NYVuwCA-ot13weNOCc0_0DTjhiMUR8eHsRFdtrANnRo,111561
|
118
118
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
119
119
|
airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=nlVvHC511NUyDEEIRBkoeDTAvLqKNp-hRy8D19z8tdk,5941
|
120
120
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
|
121
121
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
|
122
122
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
|
123
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
123
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=64HO1wnBw5NjitXgYn3e-GVfum0zrQrpmLGlzC3mLJg,157514
|
124
124
|
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=TBC9AkGaUqHm2IKHMPN6punBIcY5tWGULowcLoAVkfw,1109
|
125
125
|
airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=VelO7zKqKtzMJ35jyFeg0ypJLQC0plqqIBNXoBW1G2E,3001
|
126
126
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
@@ -156,13 +156,18 @@ airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_incremen
|
|
156
156
|
airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py,sha256=Z2i6a-oKMmOTxHxsTVSnyaShkJ3u8xZw1xIJdx2yxss,2731
|
157
157
|
airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py,sha256=ZBshGQNr5Bb_V8dqnWRISqdXFcjm1CKIXnlfbRhNl8g,1308
|
158
158
|
airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py,sha256=LoKXdUbSgHEtSwtA8DFrnX6SpQbRVVwreY8NguTKTcI,2229
|
159
|
+
airbyte_cdk/sources/declarative/requesters/query_properties/__init__.py,sha256=6Da7wsYQxzWWgpkisj47YbMPtj1sgg1b_60qTspO_Pw,604
|
160
|
+
airbyte_cdk/sources/declarative/requesters/query_properties/group_by_key.py,sha256=7uVXFwzR5Q5ngkwAtW8x3QBK963Ub1Qpo2vapF8IIc0,659
|
161
|
+
airbyte_cdk/sources/declarative/requesters/query_properties/properties_from_endpoint.py,sha256=Rz4CViWqDGKCCZ8eYbb5yVlF-ElXmBmMZqhYglFxGqA,1395
|
162
|
+
airbyte_cdk/sources/declarative/requesters/query_properties/property_chunking.py,sha256=o9Jq-7mBC8ahPii5EngA0h4gBB8UV0xxZbifG9otrIk,2254
|
163
|
+
airbyte_cdk/sources/declarative/requesters/query_properties/query_properties.py,sha256=6xULj0wzHzc8l6jV-v-fOtbeli8peBfvyeSLtioN0-0,1672
|
159
164
|
airbyte_cdk/sources/declarative/requesters/request_option.py,sha256=Bl0gxGWudmwT3FXBozTN00WYle2jd6ry_S1YylCnwqM,4825
|
160
165
|
airbyte_cdk/sources/declarative/requesters/request_options/__init__.py,sha256=WCwpKqM4wKqy-DHJaCHbKAlFqRVOqMi9K5qonxIfi_Y,809
|
161
166
|
airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py,sha256=31nG6_0igidJFQon37-WeQkTpG3g2A5ZmlluI3ilZdE,3632
|
162
167
|
airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py,sha256=SRROdPJZ5kuqHLOlkh115pWP9nDGfDxRYPgH9oD3hPo,1798
|
163
168
|
airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py,sha256=86YozYuBDfu0t9NbevIvQoGU0vqTP4rt3dRSTsHz3PA,2269
|
164
169
|
airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py,sha256=rR00kE64U2yL0McU1gPr4_W5_sLUqwDgL3Nvj691nRU,2884
|
165
|
-
airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py,sha256=
|
170
|
+
airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py,sha256=dRlG1IyEOVzWFw7wm-8TBPn7JUtZw3jz6oAoH5yuuf0,6375
|
166
171
|
airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py,sha256=8YRiDzjYvqJ-aMmKFcjqzv_-e8OZ5QG_TbpZ-nuCu6s,2590
|
167
172
|
airbyte_cdk/sources/declarative/requesters/request_path.py,sha256=S3MeFvcaQrMbOkSY2W2VbXLNomqt_3eXqVd9ZhgNwUs,299
|
168
173
|
airbyte_cdk/sources/declarative/requesters/requester.py,sha256=OcDzuCBgD1owK_lBPG0KbRRHRn9kzbuRveU4HejDiv4,5116
|
@@ -173,7 +178,7 @@ airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=Aio
|
|
173
178
|
airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=nQepwG_RfW53sgwvK5dLPqfCx0VjsQ83nYoPjBMAaLM,527
|
174
179
|
airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=6oZtnCHm9NdDvjTSrVwPQOXGSdETSIR7eWH2vFjM7jI,4855
|
175
180
|
airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
|
176
|
-
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=
|
181
|
+
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=QjkKOvvuP62f9J4RcfKTSYCP69lJBndhJCTdeRXO0LQ,31090
|
177
182
|
airbyte_cdk/sources/declarative/schema/__init__.py,sha256=xU45UvM5O4c1PSM13UHpCdh5hpW3HXy9vRRGEiAC1rg,795
|
178
183
|
airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=KTACrIE23a83wsm3Rd9Eb4K6-20lrGqYxTHNp9yxsso,1820
|
179
184
|
airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=J8Q_iJYhcSQLWyt0bTZCbDAGpxt9G8FCc6Q9jtGsNzw,10703
|
@@ -359,9 +364,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
|
|
359
364
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
360
365
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
361
366
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
362
|
-
airbyte_cdk-6.
|
363
|
-
airbyte_cdk-6.
|
364
|
-
airbyte_cdk-6.
|
365
|
-
airbyte_cdk-6.
|
366
|
-
airbyte_cdk-6.
|
367
|
-
airbyte_cdk-6.
|
367
|
+
airbyte_cdk-6.43.0.dev0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
368
|
+
airbyte_cdk-6.43.0.dev0.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
|
369
|
+
airbyte_cdk-6.43.0.dev0.dist-info/METADATA,sha256=Gkcyw6S5UfIDRNpbltYuPPi5GdcRkjEv0DaPDpChh9g,6076
|
370
|
+
airbyte_cdk-6.43.0.dev0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
371
|
+
airbyte_cdk-6.43.0.dev0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
372
|
+
airbyte_cdk-6.43.0.dev0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|