airbyte-cdk 6.26.0.dev3__py3-none-any.whl → 6.26.0.dev4100__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1800,19 +1800,6 @@ definitions:
1800
1800
  $parameters:
1801
1801
  type: object
1802
1802
  additionalProperties: true
1803
- ComplexFieldType:
1804
- title: Schema Field Type
1805
- description: (This component is experimental. Use at your own risk.) Represents a complex field type.
1806
- type: object
1807
- required:
1808
- - field_type
1809
- properties:
1810
- field_type:
1811
- type: string
1812
- items:
1813
- anyOf:
1814
- - type: string
1815
- - "$ref": "#/definitions/ComplexFieldType"
1816
1803
  TypesMap:
1817
1804
  title: Types Map
1818
1805
  description: (This component is experimental. Use at your own risk.) Represents a mapping between a current type and its corresponding target type.
@@ -1827,7 +1814,6 @@ definitions:
1827
1814
  - type: array
1828
1815
  items:
1829
1816
  type: string
1830
- - "$ref": "#/definitions/ComplexFieldType"
1831
1817
  current_type:
1832
1818
  anyOf:
1833
1819
  - type: string
@@ -147,7 +147,7 @@ class ConcurrentPerPartitionCursor(Cursor):
147
147
  < cursor.state[self.cursor_field.cursor_field_key]
148
148
  ):
149
149
  self._new_global_cursor = copy.deepcopy(cursor.state)
150
- self._emit_state_message()
150
+ self._emit_state_message()
151
151
 
152
152
  def ensure_at_least_one_state_emitted(self) -> None:
153
153
  """
@@ -192,8 +192,7 @@ class ConcurrentPerPartitionCursor(Cursor):
192
192
  self._global_cursor,
193
193
  self._lookback_window if self._global_cursor else 0,
194
194
  )
195
- with self._lock:
196
- self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
195
+ self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
197
196
  self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
198
197
  threading.Semaphore(0)
199
198
  )
@@ -211,38 +210,16 @@ class ConcurrentPerPartitionCursor(Cursor):
211
210
 
212
211
  def _ensure_partition_limit(self) -> None:
213
212
  """
214
- Ensure the maximum number of partitions does not exceed the predefined limit.
215
-
216
- Steps:
217
- 1. Attempt to remove partitions that are marked as finished in `_finished_partitions`.
218
- These partitions are considered processed and safe to delete.
219
- 2. If the limit is still exceeded and no finished partitions are available for removal,
220
- remove the oldest partition unconditionally. We expect failed partitions to be removed.
221
-
222
- Logging:
223
- - Logs a warning each time a partition is removed, indicating whether it was finished
224
- or removed due to being the oldest.
213
+ Ensure the maximum number of partitions is not exceeded. If so, the oldest added partition will be dropped.
225
214
  """
226
- with self._lock:
227
- while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
228
- # Try removing finished partitions first
229
- for partition_key in list(self._cursor_per_partition.keys()):
230
- if partition_key in self._finished_partitions:
231
- oldest_partition = self._cursor_per_partition.pop(
232
- partition_key
233
- ) # Remove the oldest partition
234
- logger.warning(
235
- f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
236
- )
237
- break
238
- else:
239
- # If no finished partitions can be removed, fall back to removing the oldest partition
240
- oldest_partition = self._cursor_per_partition.popitem(last=False)[
241
- 1
242
- ] # Remove the oldest partition
243
- logger.warning(
244
- f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
245
- )
215
+ while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
216
+ self._over_limit += 1
217
+ oldest_partition = self._cursor_per_partition.popitem(last=False)[
218
+ 0
219
+ ] # Remove the oldest partition
220
+ logger.warning(
221
+ f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
222
+ )
246
223
 
247
224
  def _set_initial_state(self, stream_state: StreamState) -> None:
248
225
  """
@@ -287,10 +264,7 @@ class ConcurrentPerPartitionCursor(Cursor):
287
264
  if not stream_state:
288
265
  return
289
266
 
290
- if (
291
- self._PERPARTITION_STATE_KEY not in stream_state
292
- and self._GLOBAL_STATE_KEY not in stream_state
293
- ):
267
+ if self._PERPARTITION_STATE_KEY not in stream_state:
294
268
  # We assume that `stream_state` is in a global format that can be applied to all partitions.
295
269
  # Example: {"global_state_format_key": "global_state_format_value"}
296
270
  self._global_cursor = deepcopy(stream_state)
@@ -299,7 +273,7 @@ class ConcurrentPerPartitionCursor(Cursor):
299
273
  else:
300
274
  self._lookback_window = int(stream_state.get("lookback_window", 0))
301
275
 
302
- for state in stream_state.get(self._PERPARTITION_STATE_KEY, []):
276
+ for state in stream_state[self._PERPARTITION_STATE_KEY]:
303
277
  self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
304
278
  self._create_cursor(state["cursor"])
305
279
  )
@@ -736,13 +736,8 @@ class HttpResponseFilter(BaseModel):
736
736
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
737
737
 
738
738
 
739
- class ComplexFieldType(BaseModel):
740
- field_type: str
741
- items: Optional[Union[str, ComplexFieldType]] = None
742
-
743
-
744
739
  class TypesMap(BaseModel):
745
- target_type: Union[str, List[str], ComplexFieldType]
740
+ target_type: Union[str, List[str]]
746
741
  current_type: Union[str, List[str]]
747
742
  condition: Optional[str] = None
748
743
 
@@ -2265,7 +2260,6 @@ class DynamicDeclarativeStream(BaseModel):
2265
2260
  )
2266
2261
 
2267
2262
 
2268
- ComplexFieldType.update_forward_refs()
2269
2263
  CompositeErrorHandler.update_forward_refs()
2270
2264
  DeclarativeSource1.update_forward_refs()
2271
2265
  DeclarativeSource2.update_forward_refs()
@@ -133,9 +133,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
133
133
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
134
134
  CheckStream as CheckStreamModel,
135
135
  )
136
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
137
- ComplexFieldType as ComplexFieldTypeModel,
138
- )
139
136
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
140
137
  ComponentMappingDefinition as ComponentMappingDefinitionModel,
141
138
  )
@@ -432,7 +429,6 @@ from airbyte_cdk.sources.declarative.retrievers import (
432
429
  SimpleRetrieverTestReadDecorator,
433
430
  )
434
431
  from airbyte_cdk.sources.declarative.schema import (
435
- ComplexFieldType,
436
432
  DefaultSchemaLoader,
437
433
  DynamicSchemaLoader,
438
434
  InlineSchemaLoader,
@@ -576,7 +572,6 @@ class ModelToComponentFactory:
576
572
  DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
577
573
  SchemaTypeIdentifierModel: self.create_schema_type_identifier,
578
574
  TypesMapModel: self.create_types_map,
579
- ComplexFieldTypeModel: self.create_complex_field_type,
580
575
  JwtAuthenticatorModel: self.create_jwt_authenticator,
581
576
  LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
582
577
  ListPartitionRouterModel: self.create_list_partition_router,
@@ -1899,26 +1894,10 @@ class ModelToComponentFactory:
1899
1894
  ) -> InlineSchemaLoader:
1900
1895
  return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
1901
1896
 
1902
- def create_complex_field_type(
1903
- self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any
1904
- ) -> ComplexFieldType:
1905
- items = (
1906
- self._create_component_from_model(model=model.items, config=config)
1907
- if isinstance(model.items, ComplexFieldTypeModel)
1908
- else model.items
1909
- )
1910
-
1911
- return ComplexFieldType(field_type=model.field_type, items=items)
1912
-
1913
- def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap:
1914
- target_type = (
1915
- self._create_component_from_model(model=model.target_type, config=config)
1916
- if isinstance(model.target_type, ComplexFieldTypeModel)
1917
- else model.target_type
1918
- )
1919
-
1897
+ @staticmethod
1898
+ def create_types_map(model: TypesMapModel, **kwargs: Any) -> TypesMap:
1920
1899
  return TypesMap(
1921
- target_type=target_type,
1900
+ target_type=model.target_type,
1922
1901
  current_type=model.current_type,
1923
1902
  condition=model.condition if model.condition is not None else "True",
1924
1903
  )
@@ -295,58 +295,24 @@ class SubstreamPartitionRouter(PartitionRouter):
295
295
  return
296
296
 
297
297
  if not parent_state and incremental_dependency:
298
- # Migrate child state to parent state format
299
- parent_state = self._migrate_child_state_to_parent_state(stream_state)
298
+ # Attempt to retrieve child state
299
+ substream_state = list(stream_state.values())
300
+ substream_state = substream_state[0] if substream_state else {} # type: ignore [assignment] # Incorrect type for assignment
301
+ parent_state = {}
302
+
303
+ # Copy child state to parent streams with incremental dependencies
304
+ if substream_state:
305
+ for parent_config in self.parent_stream_configs:
306
+ if parent_config.incremental_dependency:
307
+ parent_state[parent_config.stream.name] = {
308
+ parent_config.stream.cursor_field: substream_state
309
+ }
300
310
 
301
311
  # Set state for each parent stream with an incremental dependency
302
312
  for parent_config in self.parent_stream_configs:
303
313
  if parent_config.incremental_dependency:
304
314
  parent_config.stream.state = parent_state.get(parent_config.stream.name, {})
305
315
 
306
- def _migrate_child_state_to_parent_state(self, stream_state: StreamState) -> StreamState:
307
- """
308
- Migrate the child stream state to the parent stream's state format.
309
-
310
- This method converts the global or child state into a format compatible with parent
311
- streams. The migration occurs only for parent streams with incremental dependencies.
312
- The method filters out per-partition states and retains only the global state in the
313
- format `{cursor_field: cursor_value}`.
314
-
315
- Args:
316
- stream_state (StreamState): The state to migrate. Expected formats include:
317
- - {"updated_at": "2023-05-27T00:00:00Z"}
318
- - {"states": [...] } (ignored during migration)
319
-
320
- Returns:
321
- StreamState: A migrated state for parent streams in the format:
322
- {
323
- "parent_stream_name": {"parent_stream_cursor": "2023-05-27T00:00:00Z"}
324
- }
325
-
326
- Example:
327
- Input: {"updated_at": "2023-05-27T00:00:00Z"}
328
- Output: {
329
- "parent_stream_name": {"parent_stream_cursor": "2023-05-27T00:00:00Z"}
330
- }
331
- """
332
- substream_state_values = list(stream_state.values())
333
- substream_state = substream_state_values[0] if substream_state_values else {}
334
-
335
- # Ignore per-partition states or invalid formats
336
- if isinstance(substream_state, (list, dict)) or len(substream_state_values) != 1:
337
- return {}
338
-
339
- # Copy child state to parent streams with incremental dependencies
340
- parent_state = {}
341
- if substream_state:
342
- for parent_config in self.parent_stream_configs:
343
- if parent_config.incremental_dependency:
344
- parent_state[parent_config.stream.name] = {
345
- parent_config.stream.cursor_field: substream_state
346
- }
347
-
348
- return parent_state
349
-
350
316
  def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
351
317
  """
352
318
  Get the state of the parent streams.
@@ -4,7 +4,6 @@
4
4
 
5
5
  from airbyte_cdk.sources.declarative.schema.default_schema_loader import DefaultSchemaLoader
6
6
  from airbyte_cdk.sources.declarative.schema.dynamic_schema_loader import (
7
- ComplexFieldType,
8
7
  DynamicSchemaLoader,
9
8
  SchemaTypeIdentifier,
10
9
  TypesMap,
@@ -19,7 +18,6 @@ __all__ = [
19
18
  "SchemaLoader",
20
19
  "InlineSchemaLoader",
21
20
  "DynamicSchemaLoader",
22
- "ComplexFieldType",
23
21
  "TypesMap",
24
22
  "SchemaTypeIdentifier",
25
23
  ]
@@ -18,7 +18,7 @@ from airbyte_cdk.sources.declarative.transformations import RecordTransformation
18
18
  from airbyte_cdk.sources.source import ExperimentalClassWarning
19
19
  from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
20
20
 
21
- AIRBYTE_DATA_TYPES: Mapping[str, MutableMapping[str, Any]] = {
21
+ AIRBYTE_DATA_TYPES: Mapping[str, Mapping[str, Any]] = {
22
22
  "string": {"type": ["null", "string"]},
23
23
  "boolean": {"type": ["null", "boolean"]},
24
24
  "date": {"type": ["null", "string"], "format": "date"},
@@ -45,25 +45,6 @@ AIRBYTE_DATA_TYPES: Mapping[str, MutableMapping[str, Any]] = {
45
45
  }
46
46
 
47
47
 
48
- @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
49
- @dataclass(frozen=True)
50
- class ComplexFieldType:
51
- """
52
- Identifies complex field type
53
- """
54
-
55
- field_type: str
56
- items: Optional[Union[str, "ComplexFieldType"]] = None
57
-
58
- def __post_init__(self) -> None:
59
- """
60
- Enforces that `items` is only used when `field_type` is a array
61
- """
62
- # `items_type` is valid only for array target types
63
- if self.items and self.field_type != "array":
64
- raise ValueError("'items' can only be used when 'field_type' is an array.")
65
-
66
-
67
48
  @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
68
49
  @dataclass(frozen=True)
69
50
  class TypesMap:
@@ -71,7 +52,7 @@ class TypesMap:
71
52
  Represents a mapping between a current type and its corresponding target type.
72
53
  """
73
54
 
74
- target_type: Union[List[str], str, ComplexFieldType]
55
+ target_type: Union[List[str], str]
75
56
  current_type: Union[List[str], str]
76
57
  condition: Optional[str]
77
58
 
@@ -154,9 +135,8 @@ class DynamicSchemaLoader(SchemaLoader):
154
135
  transformed_properties = self._transform(properties, {})
155
136
 
156
137
  return {
157
- "$schema": "https://json-schema.org/draft-07/schema#",
138
+ "$schema": "http://json-schema.org/draft-07/schema#",
158
139
  "type": "object",
159
- "additionalProperties": True,
160
140
  "properties": transformed_properties,
161
141
  }
162
142
 
@@ -208,36 +188,18 @@ class DynamicSchemaLoader(SchemaLoader):
208
188
  first_type = self._get_airbyte_type(mapped_field_type[0])
209
189
  second_type = self._get_airbyte_type(mapped_field_type[1])
210
190
  return {"oneOf": [first_type, second_type]}
211
-
212
191
  elif isinstance(mapped_field_type, str):
213
192
  return self._get_airbyte_type(mapped_field_type)
214
-
215
- elif isinstance(mapped_field_type, ComplexFieldType):
216
- return self._resolve_complex_type(mapped_field_type)
217
-
218
193
  else:
219
194
  raise ValueError(
220
195
  f"Invalid data type. Available string or two items list of string. Got {mapped_field_type}."
221
196
  )
222
197
 
223
- def _resolve_complex_type(self, complex_type: ComplexFieldType) -> Mapping[str, Any]:
224
- if not complex_type.items:
225
- return self._get_airbyte_type(complex_type.field_type)
226
-
227
- field_type = self._get_airbyte_type(complex_type.field_type)
228
- field_type["items"] = (
229
- self._get_airbyte_type(complex_type.items)
230
- if isinstance(complex_type.items, str)
231
- else self._resolve_complex_type(complex_type.items)
232
- )
233
-
234
- return field_type
235
-
236
198
  def _replace_type_if_not_valid(
237
199
  self,
238
200
  field_type: Union[List[str], str],
239
201
  raw_schema: MutableMapping[str, Any],
240
- ) -> Union[List[str], str, ComplexFieldType]:
202
+ ) -> Union[List[str], str]:
241
203
  """
242
204
  Replaces a field type if it matches a type mapping in `types_map`.
243
205
  """
@@ -254,7 +216,7 @@ class DynamicSchemaLoader(SchemaLoader):
254
216
  return field_type
255
217
 
256
218
  @staticmethod
257
- def _get_airbyte_type(field_type: str) -> MutableMapping[str, Any]:
219
+ def _get_airbyte_type(field_type: str) -> Mapping[str, Any]:
258
220
  """
259
221
  Maps a field type to its corresponding Airbyte type definition.
260
222
  """
@@ -22,6 +22,13 @@ class DeliverRecords(BaseModel):
22
22
 
23
23
  delivery_type: Literal["use_records_transfer"] = Field("use_records_transfer", const=True)
24
24
 
25
+ sync_metadata: bool = Field(
26
+ title="Make stream sync files metadata",
27
+ description="If enabled, streams will sync files metadata instead of files data.",
28
+ default=False,
29
+ airbyte_hidden=True,
30
+ )
31
+
25
32
 
26
33
  class DeliverRawFiles(BaseModel):
27
34
  class Config(OneOfOptionConfig):
@@ -312,6 +312,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
312
312
  cursor=cursor,
313
313
  use_file_transfer=self._use_file_transfer(parsed_config),
314
314
  preserve_directory_structure=self._preserve_directory_structure(parsed_config),
315
+ sync_metadata=self._sync_metadata(parsed_config),
315
316
  )
316
317
 
317
318
  def _get_stream_from_catalog(
@@ -387,6 +388,14 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
387
388
  )
388
389
  return use_file_transfer
389
390
 
391
+ @staticmethod
392
+ def _use_records_transfer(parsed_config: AbstractFileBasedSpec) -> bool:
393
+ use_records_transfer = (
394
+ hasattr(parsed_config.delivery_method, "delivery_type")
395
+ and parsed_config.delivery_method.delivery_type == "use_records_transfer"
396
+ )
397
+ return use_records_transfer
398
+
390
399
  @staticmethod
391
400
  def _preserve_directory_structure(parsed_config: AbstractFileBasedSpec) -> bool:
392
401
  """
@@ -408,3 +417,13 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
408
417
  ):
409
418
  return parsed_config.delivery_method.preserve_directory_structure
410
419
  return True
420
+
421
+ @staticmethod
422
+ def _sync_metadata(parsed_config: AbstractFileBasedSpec) -> bool:
423
+ if (
424
+ FileBasedSource._use_records_transfer(parsed_config)
425
+ and hasattr(parsed_config.delivery_method, "sync_metadata")
426
+ and parsed_config.delivery_method.sync_metadata is not None
427
+ ):
428
+ return parsed_config.delivery_method.sync_metadata
429
+ return False
@@ -135,6 +135,15 @@ class AbstractFileBasedStreamReader(ABC):
135
135
  return use_file_transfer
136
136
  return False
137
137
 
138
+ def use_records_transfer(self) -> bool:
139
+ if self.config:
140
+ use_records_transfer = (
141
+ hasattr(self.config.delivery_method, "delivery_type")
142
+ and self.config.delivery_method.delivery_type == "use_records_transfer"
143
+ )
144
+ return use_records_transfer
145
+ return False
146
+
138
147
  def preserve_directory_structure(self) -> bool:
139
148
  # fall back to preserve subdirectories if config is not present or incomplete
140
149
  if (
@@ -146,6 +155,16 @@ class AbstractFileBasedStreamReader(ABC):
146
155
  return self.config.delivery_method.preserve_directory_structure
147
156
  return True
148
157
 
158
+ def sync_metadata(self) -> bool:
159
+ if (
160
+ self.config
161
+ and self.use_records_transfer()
162
+ and hasattr(self.config.delivery_method, "sync_metadata")
163
+ and self.config.delivery_method.sync_metadata is not None
164
+ ):
165
+ return self.config.delivery_method.sync_metadata
166
+ return False
167
+
149
168
  @abstractmethod
150
169
  def get_file(
151
170
  self, file: RemoteFile, local_directory: str, logger: logging.Logger
@@ -183,3 +202,26 @@ class AbstractFileBasedStreamReader(ABC):
183
202
  makedirs(path.dirname(local_file_path), exist_ok=True)
184
203
  absolute_file_path = path.abspath(local_file_path)
185
204
  return [file_relative_path, local_file_path, absolute_file_path]
205
+
206
+ def get_file_metadata(self, file: RemoteFile, logger: logging.Logger) -> Dict[str, Any]:
207
+ """
208
+ This is required for connectors that will support syncing
209
+ metadata from files.
210
+ """
211
+ return {}
212
+
213
+ def get_metadata_schema(self) -> Dict[str, Any]:
214
+ """ "
215
+ Base schema to emit metadata records for a file,
216
+ override in stream reader implementation if the requirements
217
+ are different.
218
+ """
219
+ return {
220
+ "type": "object",
221
+ "properties": {
222
+ "id": {"type": "string"},
223
+ "file_path": {"type": "string"},
224
+ "allowed_identity_remote_ids": {"type": "array", "items": "string"},
225
+ "is_public": {"type": "boolean"},
226
+ },
227
+ }
@@ -47,6 +47,7 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
47
47
 
48
48
  FILE_TRANSFER_KW = "use_file_transfer"
49
49
  PRESERVE_DIRECTORY_STRUCTURE_KW = "preserve_directory_structure"
50
+ SYNC_METADATA_KW = "sync_metadata"
50
51
  FILES_KEY = "files"
51
52
  DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
52
53
  ab_last_mod_col = "_ab_source_file_last_modified"
@@ -56,6 +57,7 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
56
57
  airbyte_columns = [ab_last_mod_col, ab_file_name_col]
57
58
  use_file_transfer = False
58
59
  preserve_directory_structure = True
60
+ sync_metadata = False
59
61
 
60
62
  def __init__(self, **kwargs: Any):
61
63
  if self.FILE_TRANSFER_KW in kwargs:
@@ -64,6 +66,8 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
64
66
  self.preserve_directory_structure = kwargs.pop(
65
67
  self.PRESERVE_DIRECTORY_STRUCTURE_KW, True
66
68
  )
69
+ if self.SYNC_METADATA_KW in kwargs:
70
+ self.sync_metadata = kwargs.pop(self.SYNC_METADATA_KW, False)
67
71
  super().__init__(**kwargs)
68
72
 
69
73
  @property
@@ -105,6 +109,8 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
105
109
  self.ab_file_name_col: {"type": "string"},
106
110
  },
107
111
  }
112
+ elif self.sync_metadata:
113
+ return self.stream_reader.get_metadata_schema()
108
114
  else:
109
115
  return super()._filter_schema_invalid_properties(configured_catalog_json_schema)
110
116
 
@@ -187,6 +193,26 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
187
193
  yield stream_data_to_airbyte_message(
188
194
  self.name, record, is_file_transfer_message=True
189
195
  )
196
+ elif self.sync_metadata:
197
+ try:
198
+ metadata_record = self.stream_reader.get_file_metadata(
199
+ file, logger=self.logger
200
+ )
201
+ yield stream_data_to_airbyte_message(
202
+ self.name, metadata_record, is_file_transfer_message=False
203
+ )
204
+ except Exception as e:
205
+ self.logger.error(
206
+ f"Failed to retrieve metadata for file {file.uri}: {str(e)}"
207
+ )
208
+ yield AirbyteMessage(
209
+ type=MessageType.LOG,
210
+ log=AirbyteLogMessage(
211
+ level=Level.ERROR,
212
+ message=f"Error retrieving metadata: stream={self.name} file={file.uri}",
213
+ stack_trace=traceback.format_exc(),
214
+ ),
215
+ )
190
216
  else:
191
217
  for record in parser.parse_records(
192
218
  self.config, file, self.stream_reader, self.logger, schema
@@ -284,6 +310,8 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
284
310
  def _get_raw_json_schema(self) -> JsonSchema:
285
311
  if self.use_file_transfer:
286
312
  return file_transfer_schema
313
+ elif self.sync_metadata:
314
+ return self.stream_reader.get_metadata_schema()
287
315
  elif self.config.input_schema:
288
316
  return self.config.get_input_schema() # type: ignore
289
317
  elif self.config.schemaless:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: airbyte-cdk
3
- Version: 6.26.0.dev3
3
+ Version: 6.26.0.dev4100
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  License: MIT
6
6
  Keywords: airbyte,connector-development-kit,cdk
@@ -67,7 +67,7 @@ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=wbfk5udu
67
67
  airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
68
68
  airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
69
69
  airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
70
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=yHOfjvrxDVnQmMi-mrdM27Y0Uqk4fYMmp9Rwdbq6-7s,139662
70
+ airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=40Ts1-r0UnF3AhAj9pXE2pf6Y8WBqRAksjTaBiCuxq0,139243
71
71
  airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
72
72
  airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
73
73
  airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=KSpQetKGqPCv-38QgcVJ5kzM5nzbFldTSsYDCS3Xf0Y,1035
@@ -88,7 +88,7 @@ airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=tjNwcURmlyD
88
88
  airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=LhqGDfX06_dDYLKsIVnwQ_nAWCln-v8PV7Wgt_QVeTI,6533
89
89
  airbyte_cdk/sources/declarative/extractors/type_transformer.py,sha256=d6Y2Rfg8pMVEEnHllfVksWZdNVOU55yk34O03dP9muY,1626
90
90
  airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=U1oZKtBaEC6IACmvziY9Wzg7Z8EgF4ZuR7NwvjlB_Sk,1255
91
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=4Z8qZ5DccF0fw163KR5fWW83O-3-84AlaZKPajZ0ZZI,15945
91
+ airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=1_5XVLJdJXMAA0gJbWt4pqD0xGgyBNSZ06JHCgpvo2c,14501
92
92
  airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=_UzUnSIUsDbRgbFTXgSyZEFb4ws-KdhdQPWO8mFbV7U,22028
93
93
  airbyte_cdk/sources/declarative/incremental/declarative_cursor.py,sha256=5Bhw9VRPyIuCaD0wmmq_L3DZsa-rJgtKSEUzSd8YYD0,536
94
94
  airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py,sha256=9HO-QbL9akvjq2NP7l498RwLA4iQZlBMQW1tZbt34I8,15943
@@ -109,20 +109,20 @@ airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW
109
109
  airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
110
110
  airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
111
111
  airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
112
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=68JPw6bLHnTh7zGN3CC8B6b9NI4hxvSPOyLyY8TVRqk,98059
112
+ airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=9SHqGpoMDIysyFLzkZoAehbsroHQKYPctIwXmSqO4Zw,97888
113
113
  airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
114
114
  airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=958MMX6_ZOJUlDDdNr9Krosgi2bCKGx2Z765M2Woz18,5505
115
115
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
116
116
  airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
117
117
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
118
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=zfWJLlopJklDK1xvoUy2qMFcnSklmQ7wwEbdWVxYlw0,122917
118
+ airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=a96vNwEc3J8S99KGtSt2G147leh8GADfkTrejVCBXzs,122064
119
119
  airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
120
120
  airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=n82J15S8bjeMZ5uROu--P3hnbQoxkY5v7RPHYx7g7ro,2929
121
121
  airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
122
122
  airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha256=t7pRdFWfFWJtQQG19c9PVeMODyO2BknRTakpM5U9N-8,4844
123
123
  airbyte_cdk/sources/declarative/partition_routers/partition_router.py,sha256=YyEIzdmLd1FjbVP3QbQ2VFCLW_P-OGbVh6VpZShp54k,2218
124
124
  airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py,sha256=SKzKjSyfccq4dxGIh-J6ejrgkCHzaiTIazmbmeQiRD4,1942
125
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py,sha256=pEz-P6D5TGtP4isNfmtakgKD95PqMLo6fasCVLIguWk,16760
125
+ airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py,sha256=5bgXoJfBg_6i53krQMptAGb50XB5XoVfqQxKQhlLtBA,15383
126
126
  airbyte_cdk/sources/declarative/requesters/README.md,sha256=eL1I4iLkxaw7hJi9S9d18_XcRl-R8lUSjqBVJJzvXmg,2656
127
127
  airbyte_cdk/sources/declarative/requesters/__init__.py,sha256=d7a3OoHbqaJDyyPli3nqqJ2yAW_SLX6XDaBAKOwvpxw,364
128
128
  airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py,sha256=SkEDcJxlT1683rNx93K9whoS0OyUukkuOfToGtgpF58,776
@@ -168,9 +168,9 @@ airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=ix9m1dkR69DcXCXUKC
168
168
  airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=kX9ltelK2xLIBWDJBK2ucrvVe5tc5xmhdbVbgsjvlxY,3696
169
169
  airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
170
170
  airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=kgnhVQxRlFqJs2-rDu2-QH-p-GzQU3nKmSp6_aq8u0s,24550
171
- airbyte_cdk/sources/declarative/schema/__init__.py,sha256=xU45UvM5O4c1PSM13UHpCdh5hpW3HXy9vRRGEiAC1rg,795
171
+ airbyte_cdk/sources/declarative/schema/__init__.py,sha256=HztgVVaZdil5UfgUZcv_Hyy84r89_EKRwyO2hoewNVg,749
172
172
  airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=KTACrIE23a83wsm3Rd9Eb4K6-20lrGqYxTHNp9yxsso,1820
173
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=d8tfDiDcJiunvN_Yalyfx5ISY5A-iIW3HbPwX2Hagh4,10702
173
+ airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=sa99VqU1U45fgZL2qEdw8ueX1tPTPfGxibQ-ZFePjSM,9361
174
174
  airbyte_cdk/sources/declarative/schema/inline_schema_loader.py,sha256=bVETE10hRsatRJq3R3BeyRR0wIoK3gcP1gcpVRQ_P5U,464
175
175
  airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py,sha256=5Wl-fqW-pVf_dxJ4yGHMAFfC4JjKHYJhqFJT1xA57F4,4177
176
176
  airbyte_cdk/sources/declarative/schema/schema_loader.py,sha256=kjt8v0N5wWKA5zyLnrDLxf1PJKdUqvQq2RVnAOAzNSY,379
@@ -201,7 +201,7 @@ airbyte_cdk/sources/file_based/availability_strategy/__init__.py,sha256=ddKQfUmk
201
201
  airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py,sha256=01Nd4b7ERAbp-OZo_8rrAzFXWPTMwr02SnWiN17nx8Q,2363
202
202
  airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=j9T5TimfWFUz7nqsaj-83G3xWmDpsmeSbDnaUNmz0UM,5849
203
203
  airbyte_cdk/sources/file_based/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
204
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=gXlZwnEKLWknnK_n7j14lANgR6vkqhlLJ-G3rRu-ox4,6897
204
+ airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=Dy610bcb2qedzKQ2OE08cYVQnkn7odBWcNx4sDVuy38,7129
205
205
  airbyte_cdk/sources/file_based/config/avro_format.py,sha256=NxTF96ewzn6HuhgodsY7Rpb-ybr1ZEWW5d4Vid64g5A,716
206
206
  airbyte_cdk/sources/file_based/config/csv_format.py,sha256=NWekkyT8dTwiVK0mwa_krQD4FJPHSDfILo8kPAg3-Vs,8006
207
207
  airbyte_cdk/sources/file_based/config/excel_format.py,sha256=9qAmTsT6SoVzNfNv0oBVkVCmiyqQuVAbfRKajjoa7Js,378
@@ -213,8 +213,8 @@ airbyte_cdk/sources/file_based/discovery_policy/__init__.py,sha256=gl3ey6mZbyfra
213
213
  airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py,sha256=dCfXX529Rd5rtopg4VeEgTPJjFtqjtjzPq6LCw18Wt0,605
214
214
  airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha256=-xujTidtrq6HC00WKbjQh1CZdT5LMuzkp5BLjqDmfTY,1007
215
215
  airbyte_cdk/sources/file_based/exceptions.py,sha256=WP0qkG6fpWoBpOyyicgp5YNE393VWyegq5qSy0v4QtM,7362
216
- airbyte_cdk/sources/file_based/file_based_source.py,sha256=Biv2QufYQtHZQCBZs4iCUpqTd82rk7xo8SDYkEeau3k,17616
217
- airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=e1KhgTh7mzvkBOz9DjLwzOsDwevrTmbxSYIcvhgWgGM,6856
216
+ airbyte_cdk/sources/file_based/file_based_source.py,sha256=ukkBHpnHH1U8ScLT6TDlxArhblMUakWf7QUIYEx_jgU,18413
217
+ airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=3tsXmh98GiYzfFXlEeUP9sKyNvQatrFkGVG1jm1RiQU,8333
218
218
  airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=blCLn0-2LC-ZdgcNyDEhqM2RiUvEjEBh-G4-t32ZtuM,1268
219
219
  airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=XNx-JC-sgzH9u3nOJ2M59FxBXvtig8LN6BIkeDOavZA,10858
220
220
  airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=QlCXB-ry3np67Q_VerQEPoWDOTcPTB6Go4ydZxY9ae4,20445
@@ -240,7 +240,7 @@ airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_c
240
240
  airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=MhFB5hOo8sjwvCh8gangaymdg3EJWYt_72brFOZt068,191
241
241
  airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py,sha256=om-x3gZFPgWDpi15S9RxZmR36VHnk8sytgN6LlBQhAw,1934
242
242
  airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py,sha256=VGV7xLyBribuBMVrXtO1xqkWJD86bl7yhXtjnwLMohM,7051
243
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=XLU5cNqQ-5mj243gNzMyXtm_oCtg1ORyoqbCsUo9Dn4,18044
243
+ airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=0uIIBsXsIm0rs6kKorurKE4donMVmt-oifufzW9fTJw,19443
244
244
  airbyte_cdk/sources/file_based/types.py,sha256=INxG7OPnkdUP69oYNKMAbwhvV1AGvLRHs1J6pIia2FI,218
245
245
  airbyte_cdk/sources/http_config.py,sha256=OBZeuyFilm6NlDlBhFQvHhTWabEvZww6OHDIlZujIS0,730
246
246
  airbyte_cdk/sources/http_logger.py,sha256=l_1fk5YwdonZ1wvAsTwjj6d36fj2WrVraIAMj5jTQdM,1575
@@ -350,8 +350,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
350
350
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
351
351
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
352
352
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
353
- airbyte_cdk-6.26.0.dev3.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
354
- airbyte_cdk-6.26.0.dev3.dist-info/METADATA,sha256=NKeOjHPNpBB8-7qZjp4_lr0l9vEAM6pOZwggBy2-DXE,6001
355
- airbyte_cdk-6.26.0.dev3.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
356
- airbyte_cdk-6.26.0.dev3.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
357
- airbyte_cdk-6.26.0.dev3.dist-info/RECORD,,
353
+ airbyte_cdk-6.26.0.dev4100.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
354
+ airbyte_cdk-6.26.0.dev4100.dist-info/METADATA,sha256=3usnEle_oFtHkTK8LGXfcT2ERqS9yx7jm9F4Cdhi6RA,6004
355
+ airbyte_cdk-6.26.0.dev4100.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
356
+ airbyte_cdk-6.26.0.dev4100.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
357
+ airbyte_cdk-6.26.0.dev4100.dist-info/RECORD,,