airbyte-cdk 6.41.9.dev4101__py3-none-any.whl → 6.42.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. airbyte_cdk/connector_builder/connector_builder_handler.py +25 -0
  2. airbyte_cdk/connector_builder/main.py +3 -0
  3. airbyte_cdk/models/__init__.py +0 -1
  4. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +0 -1
  5. airbyte_cdk/sources/declarative/async_job/job.py +6 -0
  6. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +18 -18
  7. airbyte_cdk/sources/declarative/async_job/job_tracker.py +22 -6
  8. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +0 -22
  9. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +71 -39
  10. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +2 -1
  11. airbyte_cdk/sources/declarative/manifest_declarative_source.py +17 -2
  12. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +48 -25
  13. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +45 -24
  14. airbyte_cdk/sources/declarative/partition_routers/__init__.py +4 -0
  15. airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +150 -0
  16. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +5 -1
  17. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +4 -17
  18. airbyte_cdk/sources/file_based/file_types/file_transfer.py +8 -2
  19. airbyte_cdk/sources/streams/concurrent/default_stream.py +0 -3
  20. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +4 -0
  21. airbyte_cdk/sources/types.py +0 -11
  22. airbyte_cdk/sources/utils/record_helper.py +1 -8
  23. {airbyte_cdk-6.41.9.dev4101.dist-info → airbyte_cdk-6.42.0.dist-info}/METADATA +2 -2
  24. {airbyte_cdk-6.41.9.dev4101.dist-info → airbyte_cdk-6.42.0.dist-info}/RECORD +28 -29
  25. airbyte_cdk/sources/declarative/retrievers/file_uploader.py +0 -61
  26. airbyte_cdk/sources/utils/files_directory.py +0 -15
  27. {airbyte_cdk-6.41.9.dev4101.dist-info → airbyte_cdk-6.42.0.dist-info}/LICENSE.txt +0 -0
  28. {airbyte_cdk-6.41.9.dev4101.dist-info → airbyte_cdk-6.42.0.dist-info}/LICENSE_SHORT +0 -0
  29. {airbyte_cdk-6.41.9.dev4101.dist-info → airbyte_cdk-6.42.0.dist-info}/WHEEL +0 -0
  30. {airbyte_cdk-6.41.9.dev4101.dist-info → airbyte_cdk-6.42.0.dist-info}/entry_points.txt +0 -0
@@ -1890,9 +1890,10 @@ class DeclarativeSource1(BaseModel):
1890
1890
  spec: Optional[Spec] = None
1891
1891
  concurrency_level: Optional[ConcurrencyLevel] = None
1892
1892
  api_budget: Optional[HTTPAPIBudget] = None
1893
- max_concurrent_async_job_count: Optional[int] = Field(
1893
+ max_concurrent_async_job_count: Optional[Union[int, str]] = Field(
1894
1894
  None,
1895
1895
  description="Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.",
1896
+ examples=[3, "{{ config['max_concurrent_async_job_count'] }}"],
1896
1897
  title="Maximum Concurrent Asynchronous Jobs",
1897
1898
  )
1898
1899
  metadata: Optional[Dict[str, Any]] = Field(
@@ -1922,9 +1923,10 @@ class DeclarativeSource2(BaseModel):
1922
1923
  spec: Optional[Spec] = None
1923
1924
  concurrency_level: Optional[ConcurrencyLevel] = None
1924
1925
  api_budget: Optional[HTTPAPIBudget] = None
1925
- max_concurrent_async_job_count: Optional[int] = Field(
1926
+ max_concurrent_async_job_count: Optional[Union[int, str]] = Field(
1926
1927
  None,
1927
1928
  description="Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.",
1929
+ examples=[3, "{{ config['max_concurrent_async_job_count'] }}"],
1928
1930
  title="Maximum Concurrent Asynchronous Jobs",
1929
1931
  )
1930
1932
  metadata: Optional[Dict[str, Any]] = Field(
@@ -2278,22 +2280,6 @@ class StateDelegatingStream(BaseModel):
2278
2280
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2279
2281
 
2280
2282
 
2281
- class FileUploader(BaseModel):
2282
- type: Literal["FileUploader"]
2283
- requester: Union[CustomRequester, HttpRequester] = Field(
2284
- ...,
2285
- description="Requester component that describes how to prepare HTTP requests to send to the source API.",
2286
- )
2287
- download_target_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field(
2288
- ...,
2289
- description="Responsible for fetching the url where the file is located. This is applied on each records and not on the HTTP response",
2290
- )
2291
- file_extractor: Optional[Union[CustomRecordExtractor, DpathExtractor]] = Field(
2292
- None,
2293
- description="Responsible for fetching the content of the file. If not defined, the assumption is that the whole response body is the file content",
2294
- )
2295
-
2296
-
2297
2283
  class SimpleRetriever(BaseModel):
2298
2284
  type: Literal["SimpleRetriever"]
2299
2285
  record_selector: RecordSelector = Field(
@@ -2317,18 +2303,21 @@ class SimpleRetriever(BaseModel):
2317
2303
  CustomPartitionRouter,
2318
2304
  ListPartitionRouter,
2319
2305
  SubstreamPartitionRouter,
2320
- List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
2306
+ GroupingPartitionRouter,
2307
+ List[
2308
+ Union[
2309
+ CustomPartitionRouter,
2310
+ ListPartitionRouter,
2311
+ SubstreamPartitionRouter,
2312
+ GroupingPartitionRouter,
2313
+ ]
2314
+ ],
2321
2315
  ]
2322
2316
  ] = Field(
2323
2317
  [],
2324
2318
  description="PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.",
2325
2319
  title="Partition Router",
2326
2320
  )
2327
- file_uploader: Optional[FileUploader] = Field(
2328
- None,
2329
- description="(experimental) Describes how to fetch a file",
2330
- title="File Uploader",
2331
- )
2332
2321
  decoder: Optional[
2333
2322
  Union[
2334
2323
  CustomDecoder,
@@ -2404,7 +2393,15 @@ class AsyncRetriever(BaseModel):
2404
2393
  CustomPartitionRouter,
2405
2394
  ListPartitionRouter,
2406
2395
  SubstreamPartitionRouter,
2407
- List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]],
2396
+ GroupingPartitionRouter,
2397
+ List[
2398
+ Union[
2399
+ CustomPartitionRouter,
2400
+ ListPartitionRouter,
2401
+ SubstreamPartitionRouter,
2402
+ GroupingPartitionRouter,
2403
+ ]
2404
+ ],
2408
2405
  ]
2409
2406
  ] = Field(
2410
2407
  [],
@@ -2456,6 +2453,29 @@ class SubstreamPartitionRouter(BaseModel):
2456
2453
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2457
2454
 
2458
2455
 
2456
+ class GroupingPartitionRouter(BaseModel):
2457
+ type: Literal["GroupingPartitionRouter"]
2458
+ group_size: int = Field(
2459
+ ...,
2460
+ description="The number of partitions to include in each group. This determines how many partition values are batched together in a single slice.",
2461
+ examples=[10, 50],
2462
+ title="Group Size",
2463
+ )
2464
+ underlying_partition_router: Union[
2465
+ CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter
2466
+ ] = Field(
2467
+ ...,
2468
+ description="The partition router whose output will be grouped. This can be any valid partition router component.",
2469
+ title="Underlying Partition Router",
2470
+ )
2471
+ deduplicate: Optional[bool] = Field(
2472
+ True,
2473
+ description="If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.",
2474
+ title="Deduplicate Partitions",
2475
+ )
2476
+ parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2477
+
2478
+
2459
2479
  class HttpComponentsResolver(BaseModel):
2460
2480
  type: Literal["HttpComponentsResolver"]
2461
2481
  retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field(
@@ -2469,6 +2489,9 @@ class HttpComponentsResolver(BaseModel):
2469
2489
 
2470
2490
  class DynamicDeclarativeStream(BaseModel):
2471
2491
  type: Literal["DynamicDeclarativeStream"]
2492
+ name: Optional[str] = Field(
2493
+ "", description="The dynamic stream name.", example=["Tables"], title="Name"
2494
+ )
2472
2495
  stream_template: DeclarativeStream = Field(
2473
2496
  ..., description="Reference to the stream template.", title="Stream Template"
2474
2497
  )
@@ -102,6 +102,7 @@ from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_mi
102
102
  )
103
103
  from airbyte_cdk.sources.declarative.models import (
104
104
  CustomStateMigration,
105
+ GzipDecoder,
105
106
  )
106
107
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
107
108
  AddedFieldDefinition as AddedFieldDefinitionModel,
@@ -220,15 +221,15 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
220
221
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
221
222
  ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
222
223
  )
223
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
224
- FileUploader as FileUploaderModel,
225
- )
226
224
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
227
225
  FixedWindowCallRatePolicy as FixedWindowCallRatePolicyModel,
228
226
  )
229
227
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
230
228
  FlattenFields as FlattenFieldsModel,
231
229
  )
230
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
231
+ GroupingPartitionRouter as GroupingPartitionRouterModel,
232
+ )
232
233
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
233
234
  GzipDecoder as GzipDecoderModel,
234
235
  )
@@ -387,6 +388,7 @@ from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
387
388
  )
388
389
  from airbyte_cdk.sources.declarative.partition_routers import (
389
390
  CartesianProductStreamSlicer,
391
+ GroupingPartitionRouter,
390
392
  ListPartitionRouter,
391
393
  PartitionRouter,
392
394
  SinglePartitionRouter,
@@ -444,7 +446,6 @@ from airbyte_cdk.sources.declarative.retrievers import (
444
446
  SimpleRetriever,
445
447
  SimpleRetrieverTestReadDecorator,
446
448
  )
447
- from airbyte_cdk.sources.declarative.retrievers.file_uploader import FileUploader
448
449
  from airbyte_cdk.sources.declarative.schema import (
449
450
  ComplexFieldType,
450
451
  DefaultSchemaLoader,
@@ -636,12 +637,12 @@ class ModelToComponentFactory:
636
637
  ComponentMappingDefinitionModel: self.create_components_mapping_definition,
637
638
  ZipfileDecoderModel: self.create_zipfile_decoder,
638
639
  HTTPAPIBudgetModel: self.create_http_api_budget,
639
- FileUploaderModel: self.create_file_uploader,
640
640
  FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy,
641
641
  MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy,
642
642
  UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
643
643
  RateModel: self.create_rate,
644
644
  HttpRequestRegexMatcherModel: self.create_http_request_matcher,
645
+ GroupingPartitionRouterModel: self.create_grouping_partition_router,
645
646
  }
646
647
 
647
648
  # Needed for the case where we need to perform a second parse on the fields of a custom component
@@ -1359,6 +1360,9 @@ class ModelToComponentFactory:
1359
1360
  )
1360
1361
  stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
1361
1362
 
1363
+ # Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state
1364
+ use_global_cursor = isinstance(partition_router, GroupingPartitionRouter)
1365
+
1362
1366
  # Return the concurrent cursor and state converter
1363
1367
  return ConcurrentPerPartitionCursor(
1364
1368
  cursor_factory=cursor_factory,
@@ -1370,6 +1374,7 @@ class ModelToComponentFactory:
1370
1374
  connector_state_manager=state_manager,
1371
1375
  connector_state_converter=connector_state_converter,
1372
1376
  cursor_field=cursor_field,
1377
+ use_global_cursor=use_global_cursor,
1373
1378
  )
1374
1379
 
1375
1380
  @staticmethod
@@ -3077,8 +3082,11 @@ class ModelToComponentFactory:
3077
3082
  stream_slices,
3078
3083
  self._job_tracker,
3079
3084
  self._message_repository,
3080
- has_bulk_parent=False,
3081
3085
  # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
3086
+ has_bulk_parent=False,
3087
+ # set the `job_max_retry` to 1 for the `Connector Builder`` use-case.
3088
+ # `None` == default retry is set to 3 attempts, under the hood.
3089
+ job_max_retry=1 if self._emit_connector_builder_messages else None,
3082
3090
  ),
3083
3091
  stream_slicer=stream_slicer,
3084
3092
  config=config,
@@ -3322,24 +3330,6 @@ class ModelToComponentFactory:
3322
3330
  matchers=matchers,
3323
3331
  )
3324
3332
 
3325
- def create_file_uploader(
3326
- self, model: FileUploaderModel, config: Config, **kwargs: Any
3327
- ) -> FileUploader:
3328
- name = "File Uploader"
3329
- requester = self._create_component_from_model(
3330
- model=model.requester,
3331
- config=config,
3332
- name=name,
3333
- **kwargs,
3334
- )
3335
- download_target_extractor = self._create_component_from_model(
3336
- model=model.download_target_extractor,
3337
- config=config,
3338
- name=name,
3339
- **kwargs,
3340
- )
3341
- return FileUploader(requester, download_target_extractor)
3342
-
3343
3333
  def create_moving_window_call_rate_policy(
3344
3334
  self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any
3345
3335
  ) -> MovingWindowCallRatePolicy:
@@ -3389,3 +3379,34 @@ class ModelToComponentFactory:
3389
3379
  self._api_budget = self.create_component(
3390
3380
  model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config
3391
3381
  )
3382
+
3383
+ def create_grouping_partition_router(
3384
+ self, model: GroupingPartitionRouterModel, config: Config, **kwargs: Any
3385
+ ) -> GroupingPartitionRouter:
3386
+ underlying_router = self._create_component_from_model(
3387
+ model=model.underlying_partition_router, config=config
3388
+ )
3389
+ if model.group_size < 1:
3390
+ raise ValueError(f"Group size must be greater than 0, got {model.group_size}")
3391
+
3392
+ # Request options in underlying partition routers are not supported for GroupingPartitionRouter
3393
+ # because they are specific to individual partitions and cannot be aggregated or handled
3394
+ # when grouping, potentially leading to incorrect API calls. Any request customization
3395
+ # should be managed at the stream level through the requester's configuration.
3396
+ if isinstance(underlying_router, SubstreamPartitionRouter):
3397
+ if any(
3398
+ parent_config.request_option
3399
+ for parent_config in underlying_router.parent_stream_configs
3400
+ ):
3401
+ raise ValueError("Request options are not supported for GroupingPartitionRouter.")
3402
+
3403
+ if isinstance(underlying_router, ListPartitionRouter):
3404
+ if underlying_router.request_option:
3405
+ raise ValueError("Request options are not supported for GroupingPartitionRouter.")
3406
+
3407
+ return GroupingPartitionRouter(
3408
+ group_size=model.group_size,
3409
+ underlying_partition_router=underlying_router,
3410
+ deduplicate=model.deduplicate if model.deduplicate is not None else True,
3411
+ config=config,
3412
+ )
@@ -8,6 +8,9 @@ from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_route
8
8
  from airbyte_cdk.sources.declarative.partition_routers.cartesian_product_stream_slicer import (
9
9
  CartesianProductStreamSlicer,
10
10
  )
11
+ from airbyte_cdk.sources.declarative.partition_routers.grouping_partition_router import (
12
+ GroupingPartitionRouter,
13
+ )
11
14
  from airbyte_cdk.sources.declarative.partition_routers.list_partition_router import (
12
15
  ListPartitionRouter,
13
16
  )
@@ -22,6 +25,7 @@ from airbyte_cdk.sources.declarative.partition_routers.substream_partition_route
22
25
  __all__ = [
23
26
  "AsyncJobPartitionRouter",
24
27
  "CartesianProductStreamSlicer",
28
+ "GroupingPartitionRouter",
25
29
  "ListPartitionRouter",
26
30
  "SinglePartitionRouter",
27
31
  "SubstreamPartitionRouter",
@@ -0,0 +1,150 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Any, Iterable, Mapping, Optional
7
+
8
+ from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
9
+ from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
10
+
11
+
12
+ @dataclass
13
+ class GroupingPartitionRouter(PartitionRouter):
14
+ """
15
+ A partition router that groups partitions from an underlying partition router into batches of a specified size.
16
+ This is useful for APIs that support filtering by multiple partition keys in a single request.
17
+
18
+ Attributes:
19
+ group_size (int): The number of partitions to include in each group.
20
+ underlying_partition_router (PartitionRouter): The partition router whose output will be grouped.
21
+ deduplicate (bool): If True, ensures unique partitions within each group by removing duplicates based on the partition key.
22
+ config (Config): The connector configuration.
23
+ parameters (Mapping[str, Any]): Additional parameters for interpolation and configuration.
24
+ """
25
+
26
+ group_size: int
27
+ underlying_partition_router: PartitionRouter
28
+ config: Config
29
+ deduplicate: bool = True
30
+
31
+ def __post_init__(self) -> None:
32
+ self._state: Optional[Mapping[str, StreamState]] = {}
33
+
34
+ def stream_slices(self) -> Iterable[StreamSlice]:
35
+ """
36
+ Lazily groups partitions from the underlying partition router into batches of size `group_size`.
37
+
38
+ This method processes partitions one at a time from the underlying router, maintaining a batch buffer.
39
+ When the buffer reaches `group_size` or the underlying router is exhausted, it yields a grouped slice.
40
+ If deduplication is enabled, it tracks seen partition keys to ensure uniqueness within the current batch.
41
+
42
+ Yields:
43
+ Iterable[StreamSlice]: An iterable of StreamSlice objects, where each slice contains a batch of partition values.
44
+ """
45
+ batch = []
46
+ seen_keys = set()
47
+
48
+ # Iterate over partitions lazily from the underlying router
49
+ for partition in self.underlying_partition_router.stream_slices():
50
+ # Extract the partition key (assuming single key-value pair, e.g., {"board_ids": value})
51
+ partition_keys = list(partition.partition.keys())
52
+ # skip parent_slice as it is part of SubstreamPartitionRouter partition
53
+ if "parent_slice" in partition_keys:
54
+ partition_keys.remove("parent_slice")
55
+ if len(partition_keys) != 1:
56
+ raise ValueError(
57
+ f"GroupingPartitionRouter expects a single partition key-value pair. Got {partition.partition}"
58
+ )
59
+ key = partition.partition[partition_keys[0]]
60
+
61
+ # Skip duplicates if deduplication is enabled
62
+ if self.deduplicate and key in seen_keys:
63
+ continue
64
+
65
+ # Add partition to the batch
66
+ batch.append(partition)
67
+ if self.deduplicate:
68
+ seen_keys.add(key)
69
+
70
+ # Yield the batch when it reaches the group_size
71
+ if len(batch) == self.group_size:
72
+ self._state = self.underlying_partition_router.get_stream_state()
73
+ yield self._create_grouped_slice(batch)
74
+ batch = [] # Reset the batch
75
+
76
+ self._state = self.underlying_partition_router.get_stream_state()
77
+ # Yield any remaining partitions if the batch isn't empty
78
+ if batch:
79
+ yield self._create_grouped_slice(batch)
80
+
81
+ def _create_grouped_slice(self, batch: list[StreamSlice]) -> StreamSlice:
82
+ """
83
+ Creates a grouped StreamSlice from a batch of partitions, aggregating extra fields into a dictionary with list values.
84
+
85
+ Args:
86
+ batch (list[StreamSlice]): A list of StreamSlice objects to group.
87
+
88
+ Returns:
89
+ StreamSlice: A single StreamSlice with combined partition and extra field values.
90
+ """
91
+ # Combine partition values into a single dict with lists
92
+ grouped_partition = {
93
+ key: [p.partition.get(key) for p in batch] for key in batch[0].partition.keys()
94
+ }
95
+
96
+ # Aggregate extra fields into a dict with list values
97
+ extra_fields_dict = (
98
+ {
99
+ key: [p.extra_fields.get(key) for p in batch]
100
+ for key in set().union(*(p.extra_fields.keys() for p in batch if p.extra_fields))
101
+ }
102
+ if any(p.extra_fields for p in batch)
103
+ else {}
104
+ )
105
+ return StreamSlice(
106
+ partition=grouped_partition,
107
+ cursor_slice={}, # Cursor is managed by the underlying router or incremental sync
108
+ extra_fields=extra_fields_dict,
109
+ )
110
+
111
+ def get_request_params(
112
+ self,
113
+ stream_state: Optional[StreamState] = None,
114
+ stream_slice: Optional[StreamSlice] = None,
115
+ next_page_token: Optional[Mapping[str, Any]] = None,
116
+ ) -> Mapping[str, Any]:
117
+ return {}
118
+
119
+ def get_request_headers(
120
+ self,
121
+ stream_state: Optional[StreamState] = None,
122
+ stream_slice: Optional[StreamSlice] = None,
123
+ next_page_token: Optional[Mapping[str, Any]] = None,
124
+ ) -> Mapping[str, Any]:
125
+ return {}
126
+
127
+ def get_request_body_data(
128
+ self,
129
+ stream_state: Optional[StreamState] = None,
130
+ stream_slice: Optional[StreamSlice] = None,
131
+ next_page_token: Optional[Mapping[str, Any]] = None,
132
+ ) -> Mapping[str, Any]:
133
+ return {}
134
+
135
+ def get_request_body_json(
136
+ self,
137
+ stream_state: Optional[StreamState] = None,
138
+ stream_slice: Optional[StreamSlice] = None,
139
+ next_page_token: Optional[Mapping[str, Any]] = None,
140
+ ) -> Mapping[str, Any]:
141
+ return {}
142
+
143
+ def set_initial_state(self, stream_state: StreamState) -> None:
144
+ """Delegate state initialization to the underlying partition router."""
145
+ self.underlying_partition_router.set_initial_state(stream_state)
146
+ self._state = self.underlying_partition_router.get_stream_state()
147
+
148
+ def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
149
+ """Delegate state retrieval to the underlying partition router."""
150
+ return self._state
@@ -374,7 +374,11 @@ class SubstreamPartitionRouter(PartitionRouter):
374
374
  # Ignore per-partition states or invalid formats.
375
375
  if isinstance(substream_state, (list, dict)) or len(substream_state_values) != 1:
376
376
  # If a global state is present under the key "state", use its first value.
377
- if "state" in stream_state and isinstance(stream_state["state"], dict):
377
+ if (
378
+ "state" in stream_state
379
+ and isinstance(stream_state["state"], dict)
380
+ and stream_state["state"] != {}
381
+ ):
378
382
  substream_state = list(stream_state["state"].values())[0]
379
383
  else:
380
384
  return {}
@@ -3,7 +3,6 @@
3
3
  from typing import Any, Iterable, Mapping, Optional
4
4
 
5
5
  from airbyte_cdk.sources.declarative.retrievers import Retriever
6
- from airbyte_cdk.sources.declarative.retrievers.file_uploader import FileUploader
7
6
  from airbyte_cdk.sources.message import MessageRepository
8
7
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
9
8
  from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
@@ -19,7 +18,6 @@ class DeclarativePartitionFactory:
19
18
  json_schema: Mapping[str, Any],
20
19
  retriever: Retriever,
21
20
  message_repository: MessageRepository,
22
- file_uploader: Optional[FileUploader] = None,
23
21
  ) -> None:
24
22
  """
25
23
  The DeclarativePartitionFactory takes a retriever_factory and not a retriever directly. The reason is that our components are not
@@ -30,7 +28,6 @@ class DeclarativePartitionFactory:
30
28
  self._json_schema = json_schema
31
29
  self._retriever = retriever
32
30
  self._message_repository = message_repository
33
- self._file_uploader = file_uploader
34
31
 
35
32
  def create(self, stream_slice: StreamSlice) -> Partition:
36
33
  return DeclarativePartition(
@@ -38,7 +35,6 @@ class DeclarativePartitionFactory:
38
35
  self._json_schema,
39
36
  self._retriever,
40
37
  self._message_repository,
41
- self._file_uploader,
42
38
  stream_slice,
43
39
  )
44
40
 
@@ -50,32 +46,23 @@ class DeclarativePartition(Partition):
50
46
  json_schema: Mapping[str, Any],
51
47
  retriever: Retriever,
52
48
  message_repository: MessageRepository,
53
- file_uploader: Optional[FileUploader],
54
49
  stream_slice: StreamSlice,
55
50
  ):
56
51
  self._stream_name = stream_name
57
52
  self._json_schema = json_schema
58
53
  self._retriever = retriever
59
54
  self._message_repository = message_repository
60
- self._file_uploader = file_uploader
61
55
  self._stream_slice = stream_slice
62
56
  self._hash = SliceHasher.hash(self._stream_name, self._stream_slice)
63
57
 
64
58
  def read(self) -> Iterable[Record]:
65
59
  for stream_data in self._retriever.read_records(self._json_schema, self._stream_slice):
66
60
  if isinstance(stream_data, Mapping):
67
- record = (
68
- stream_data
69
- if isinstance(stream_data, Record)
70
- else Record(
71
- data=stream_data,
72
- stream_name=self.stream_name(),
73
- associated_slice=self._stream_slice,
74
- )
61
+ yield Record(
62
+ data=stream_data,
63
+ stream_name=self.stream_name(),
64
+ associated_slice=self._stream_slice,
75
65
  )
76
- if self._file_uploader:
77
- self._file_uploader.upload(record)
78
- yield record
79
66
  else:
80
67
  self._message_repository.emit_message(stream_data)
81
68
 
@@ -8,12 +8,18 @@ from typing import Any, Dict, Iterable
8
8
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
9
9
  from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
10
10
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
11
- from airbyte_cdk.sources.utils.files_directory import get_files_directory
11
+
12
+ AIRBYTE_STAGING_DIRECTORY = os.getenv("AIRBYTE_STAGING_DIRECTORY", "/staging/files")
13
+ DEFAULT_LOCAL_DIRECTORY = "/tmp/airbyte-file-transfer"
12
14
 
13
15
 
14
16
  class FileTransfer:
15
17
  def __init__(self) -> None:
16
- self._local_directory = get_files_directory()
18
+ self._local_directory = (
19
+ AIRBYTE_STAGING_DIRECTORY
20
+ if os.path.exists(AIRBYTE_STAGING_DIRECTORY)
21
+ else DEFAULT_LOCAL_DIRECTORY
22
+ )
17
23
 
18
24
  def get_file(
19
25
  self,
@@ -29,7 +29,6 @@ class DefaultStream(AbstractStream):
29
29
  logger: Logger,
30
30
  cursor: Cursor,
31
31
  namespace: Optional[str] = None,
32
- supports_file_transfer: bool = False,
33
32
  ) -> None:
34
33
  self._stream_partition_generator = partition_generator
35
34
  self._name = name
@@ -40,7 +39,6 @@ class DefaultStream(AbstractStream):
40
39
  self._logger = logger
41
40
  self._cursor = cursor
42
41
  self._namespace = namespace
43
- self._supports_file_transfer = supports_file_transfer
44
42
 
45
43
  def generate_partitions(self) -> Iterable[Partition]:
46
44
  yield from self._stream_partition_generator.generate()
@@ -70,7 +68,6 @@ class DefaultStream(AbstractStream):
70
68
  json_schema=dict(self._json_schema),
71
69
  supported_sync_modes=[SyncMode.full_refresh],
72
70
  is_resumable=False,
73
- is_file_based=self._supports_file_transfer,
74
71
  )
75
72
 
76
73
  if self._namespace:
@@ -71,6 +71,10 @@ class AbstractStreamStateConverter(ABC):
71
71
  for stream_slice in state.get("slices", []):
72
72
  stream_slice[self.START_KEY] = self._from_state_message(stream_slice[self.START_KEY])
73
73
  stream_slice[self.END_KEY] = self._from_state_message(stream_slice[self.END_KEY])
74
+ if self.MOST_RECENT_RECORD_KEY in stream_slice:
75
+ stream_slice[self.MOST_RECENT_RECORD_KEY] = self._from_state_message(
76
+ stream_slice[self.MOST_RECENT_RECORD_KEY]
77
+ )
74
78
  return state
75
79
 
76
80
  def serialize(
@@ -6,7 +6,6 @@ from __future__ import annotations
6
6
 
7
7
  from typing import Any, ItemsView, Iterator, KeysView, List, Mapping, Optional, ValuesView
8
8
 
9
- from airbyte_cdk.models import AirbyteRecordMessageFileReference
10
9
  from airbyte_cdk.utils.slice_hasher import SliceHasher
11
10
 
12
11
  # A FieldPointer designates a path to a field inside a mapping. For example, retrieving ["k1", "k1.2"] in the object {"k1" :{"k1.2":
@@ -25,13 +24,11 @@ class Record(Mapping[str, Any]):
25
24
  stream_name: str,
26
25
  associated_slice: Optional[StreamSlice] = None,
27
26
  is_file_transfer_message: bool = False,
28
- file_reference: Optional[AirbyteRecordMessageFileReference] = None,
29
27
  ):
30
28
  self._data = data
31
29
  self._associated_slice = associated_slice
32
30
  self.stream_name = stream_name
33
31
  self.is_file_transfer_message = is_file_transfer_message
34
- self._file_reference = file_reference
35
32
 
36
33
  @property
37
34
  def data(self) -> Mapping[str, Any]:
@@ -41,14 +38,6 @@ class Record(Mapping[str, Any]):
41
38
  def associated_slice(self) -> Optional[StreamSlice]:
42
39
  return self._associated_slice
43
40
 
44
- @property
45
- def file_reference(self) -> AirbyteRecordMessageFileReference:
46
- return self._file_reference
47
-
48
- @file_reference.setter
49
- def file_reference(self, value: AirbyteRecordMessageFileReference):
50
- self._file_reference = value
51
-
52
41
  def __repr__(self) -> str:
53
42
  return repr(self._data)
54
43
 
@@ -9,7 +9,6 @@ from airbyte_cdk.models import (
9
9
  AirbyteLogMessage,
10
10
  AirbyteMessage,
11
11
  AirbyteRecordMessage,
12
- AirbyteRecordMessageFileReference,
13
12
  AirbyteTraceMessage,
14
13
  )
15
14
  from airbyte_cdk.models import Type as MessageType
@@ -24,7 +23,6 @@ def stream_data_to_airbyte_message(
24
23
  transformer: TypeTransformer = TypeTransformer(TransformConfig.NoTransform),
25
24
  schema: Optional[Mapping[str, Any]] = None,
26
25
  is_file_transfer_message: bool = False,
27
- file_reference: Optional[AirbyteRecordMessageFileReference] = None,
28
26
  ) -> AirbyteMessage:
29
27
  if schema is None:
30
28
  schema = {}
@@ -43,12 +41,7 @@ def stream_data_to_airbyte_message(
43
41
  stream=stream_name, file=data, emitted_at=now_millis, data={}
44
42
  )
45
43
  else:
46
- message = AirbyteRecordMessage(
47
- stream=stream_name,
48
- data=data,
49
- emitted_at=now_millis,
50
- file_reference=file_reference,
51
- )
44
+ message = AirbyteRecordMessage(stream=stream_name, data=data, emitted_at=now_millis)
52
45
  return AirbyteMessage(type=MessageType.RECORD, record=message)
53
46
  case AirbyteTraceMessage():
54
47
  return AirbyteMessage(type=MessageType.TRACE, trace=data_or_message)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 6.41.9.dev4101
3
+ Version: 6.42.0
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -22,7 +22,7 @@ Provides-Extra: sql
22
22
  Provides-Extra: vector-db-based
23
23
  Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
24
24
  Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
25
- Requires-Dist: airbyte-protocol-models-dataclasses (==0.14.1337.dev1742858109)
25
+ Requires-Dist: airbyte-protocol-models-dataclasses (>=0.14,<0.15)
26
26
  Requires-Dist: anyascii (>=0.3.2,<0.4.0)
27
27
  Requires-Dist: avro (>=1.11.2,<1.13.0) ; extra == "file-based"
28
28
  Requires-Dist: backoff