airbyte-cdk 6.38.5.dev0__py3-none-any.whl → 6.39.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,7 +17,11 @@ class ConcurrentJobLimitReached(Exception):
17
17
  class JobTracker:
18
18
  def __init__(self, limit: int):
19
19
  self._jobs: Set[str] = set()
20
- self._limit = limit
20
+ if limit < 1:
21
+ LOGGER.warning(
22
+ f"The `max_concurrent_async_job_count` property is less than 1: {limit}. Setting to 1. Please update the source manifest to set a valid value."
23
+ )
24
+ self._limit = 1 if limit < 1 else limit
21
25
  self._lock = threading.Lock()
22
26
 
23
27
  def try_to_get_intent(self) -> str:
@@ -42,6 +42,10 @@ properties:
42
42
  "$ref": "#/definitions/ConcurrencyLevel"
43
43
  api_budget:
44
44
  "$ref": "#/definitions/HTTPAPIBudget"
45
+ max_concurrent_async_job_count:
46
+ title: Maximum Concurrent Asynchronous Jobs
47
+ description: Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.
48
+ type: integer
45
49
  metadata:
46
50
  type: object
47
51
  description: For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.
@@ -109,7 +113,7 @@ definitions:
109
113
  description: List of transformations (path and corresponding value) that will be added to the record.
110
114
  type: array
111
115
  items:
112
- - "$ref": "#/definitions/AddedFieldDefinition"
116
+ "$ref": "#/definitions/AddedFieldDefinition"
113
117
  $parameters:
114
118
  type: object
115
119
  additionalProperties: true
@@ -1657,7 +1661,7 @@ definitions:
1657
1661
  description: List of potentially nested fields describing the full path of the field to extract. Use "*" to extract all values from an array. See more info in the [docs](https://docs.airbyte.com/connector-development/config-based/understanding-the-yaml-file/record-selector).
1658
1662
  type: array
1659
1663
  items:
1660
- - type: string
1664
+ type: string
1661
1665
  interpolation_context:
1662
1666
  - config
1663
1667
  examples:
@@ -2073,7 +2077,7 @@ definitions:
2073
2077
  type: array
2074
2078
  default: []
2075
2079
  items:
2076
- - type: string
2080
+ type: string
2077
2081
  interpolation_context:
2078
2082
  - config
2079
2083
  key_pointer:
@@ -2081,7 +2085,7 @@ definitions:
2081
2085
  description: List of potentially nested fields describing the full path of the field key to extract.
2082
2086
  type: array
2083
2087
  items:
2084
- - type: string
2088
+ type: string
2085
2089
  interpolation_context:
2086
2090
  - config
2087
2091
  type_pointer:
@@ -2089,13 +2093,13 @@ definitions:
2089
2093
  description: List of potentially nested fields describing the full path of the field type to extract.
2090
2094
  type: array
2091
2095
  items:
2092
- - type: string
2096
+ type: string
2093
2097
  interpolation_context:
2094
2098
  - config
2095
2099
  types_mapping:
2096
2100
  type: array
2097
2101
  items:
2098
- - "$ref": "#/definitions/TypesMap"
2102
+ "$ref": "#/definitions/TypesMap"
2099
2103
  $parameters:
2100
2104
  type: object
2101
2105
  additionalProperties: true
@@ -2251,7 +2255,7 @@ definitions:
2251
2255
  description: A path to field that needs to be flattened.
2252
2256
  type: array
2253
2257
  items:
2254
- - type: string
2258
+ type: string
2255
2259
  examples:
2256
2260
  - ["data"]
2257
2261
  - ["data", "*", "field"]
@@ -3526,7 +3530,7 @@ definitions:
3526
3530
  description: A list of potentially nested fields indicating the full path where value will be added or updated.
3527
3531
  type: array
3528
3532
  items:
3529
- - type: string
3533
+ type: string
3530
3534
  interpolation_context:
3531
3535
  - config
3532
3536
  - components_values
@@ -3602,7 +3606,7 @@ definitions:
3602
3606
  description: A list of potentially nested fields indicating the full path in source config file where streams configs located.
3603
3607
  type: array
3604
3608
  items:
3605
- - type: string
3609
+ type: string
3606
3610
  interpolation_context:
3607
3611
  - parameters
3608
3612
  examples:
@@ -93,7 +93,10 @@ class ManifestDeclarativeSource(DeclarativeSource):
93
93
  self._constructor = (
94
94
  component_factory
95
95
  if component_factory
96
- else ModelToComponentFactory(emit_connector_builder_messages)
96
+ else ModelToComponentFactory(
97
+ emit_connector_builder_messages,
98
+ max_concurrent_async_job_count=source_config.get("max_concurrent_async_job_count"),
99
+ )
97
100
  )
98
101
  self._message_repository = self._constructor.get_message_repository()
99
102
  self._slice_logger: SliceLogger = (
@@ -1871,6 +1871,11 @@ class DeclarativeSource1(BaseModel):
1871
1871
  spec: Optional[Spec] = None
1872
1872
  concurrency_level: Optional[ConcurrencyLevel] = None
1873
1873
  api_budget: Optional[HTTPAPIBudget] = None
1874
+ max_concurrent_async_job_count: Optional[int] = Field(
1875
+ None,
1876
+ description="Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.",
1877
+ title="Maximum Concurrent Asynchronous Jobs",
1878
+ )
1874
1879
  metadata: Optional[Dict[str, Any]] = Field(
1875
1880
  None,
1876
1881
  description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.",
@@ -1898,6 +1903,11 @@ class DeclarativeSource2(BaseModel):
1898
1903
  spec: Optional[Spec] = None
1899
1904
  concurrency_level: Optional[ConcurrencyLevel] = None
1900
1905
  api_budget: Optional[HTTPAPIBudget] = None
1906
+ max_concurrent_async_job_count: Optional[int] = Field(
1907
+ None,
1908
+ description="Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.",
1909
+ title="Maximum Concurrent Asynchronous Jobs",
1910
+ )
1901
1911
  metadata: Optional[Dict[str, Any]] = Field(
1902
1912
  None,
1903
1913
  description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.",
@@ -503,7 +503,7 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.incrementing_count_
503
503
  IncrementingCountStreamStateConverter,
504
504
  )
505
505
  from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction
506
- from airbyte_cdk.sources.types import Config
506
+ from airbyte_cdk.sources.types import Config, ConnectionDefinition
507
507
  from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
508
508
 
509
509
  ComponentDefinition = Mapping[str, Any]
@@ -527,6 +527,7 @@ class ModelToComponentFactory:
527
527
  disable_resumable_full_refresh: bool = False,
528
528
  message_repository: Optional[MessageRepository] = None,
529
529
  connector_state_manager: Optional[ConnectorStateManager] = None,
530
+ max_concurrent_async_job_count: Optional[int] = None,
530
531
  ):
531
532
  self._init_mappings()
532
533
  self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice
@@ -540,6 +541,7 @@ class ModelToComponentFactory:
540
541
  )
541
542
  self._connector_state_manager = connector_state_manager or ConnectorStateManager()
542
543
  self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None
544
+ self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1)
543
545
 
544
546
  def _init_mappings(self) -> None:
545
547
  self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = {
@@ -2928,8 +2930,7 @@ class ModelToComponentFactory:
2928
2930
  job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator(
2929
2931
  job_repository,
2930
2932
  stream_slices,
2931
- JobTracker(1),
2932
- # FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1
2933
+ self._job_tracker,
2933
2934
  self._message_repository,
2934
2935
  has_bulk_parent=False,
2935
2936
  # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
@@ -48,6 +48,9 @@ from airbyte_cdk.sources.file_based.exceptions import (
48
48
  FileBasedErrorsCollector,
49
49
  FileBasedSourceError,
50
50
  )
51
+ from airbyte_cdk.sources.file_based.file_based_stream_permissions_reader import (
52
+ AbstractFileBasedStreamPermissionsReader,
53
+ )
51
54
  from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
52
55
  from airbyte_cdk.sources.file_based.file_types import default_parsers
53
56
  from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
@@ -100,8 +103,10 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
100
103
  cursor_cls: Type[
101
104
  Union[AbstractConcurrentFileBasedCursor, AbstractFileBasedCursor]
102
105
  ] = FileBasedConcurrentCursor,
106
+ stream_permissions_reader: Optional[AbstractFileBasedStreamPermissionsReader] = None,
103
107
  ):
104
108
  self.stream_reader = stream_reader
109
+ self.stream_permissions_reader = stream_permissions_reader
105
110
  self.spec_class = spec_class
106
111
  self.config = config
107
112
  self.catalog = catalog
@@ -234,6 +239,8 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
234
239
  try:
235
240
  parsed_config = self._get_parsed_config(config)
236
241
  self.stream_reader.config = parsed_config
242
+ if self.stream_permissions_reader:
243
+ self.stream_permissions_reader.config = parsed_config
237
244
  streams: List[Stream] = []
238
245
  for stream_config in parsed_config.streams:
239
246
  # Like state_manager, `catalog_stream` may be None during `check`
@@ -337,9 +344,23 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
337
344
  preserve_directory_structure=preserve_directory_structure(parsed_config),
338
345
  )
339
346
 
347
+ def _ensure_permissions_reader_available(self) -> None:
348
+ """
349
+ Validates that a stream permissions reader is available.
350
+ Raises a ValueError if the reader is not provided.
351
+ """
352
+ if not self.stream_permissions_reader:
353
+ raise ValueError(
354
+ "Stream permissions reader is required for streams that use permissions transfer mode."
355
+ )
356
+
340
357
  def _make_permissions_stream(
341
358
  self, stream_config: FileBasedStreamConfig, cursor: Optional[AbstractFileBasedCursor]
342
359
  ) -> AbstractFileBasedStream:
360
+ """
361
+ Creates a stream that reads permissions from files.
362
+ """
363
+ self._ensure_permissions_reader_available()
343
364
  return PermissionsFileBasedStream(
344
365
  config=stream_config,
345
366
  catalog_schema=self.stream_schemas.get(stream_config.name),
@@ -350,6 +371,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
350
371
  validation_policy=self._validate_and_get_validation_policy(stream_config),
351
372
  errors_collector=self.errors_collector,
352
373
  cursor=cursor,
374
+ stream_permissions_reader=self.stream_permissions_reader, # type: ignore
353
375
  )
354
376
 
355
377
  def _make_file_based_stream(
@@ -370,9 +392,10 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
370
392
  def _make_identities_stream(
371
393
  self,
372
394
  ) -> Stream:
395
+ self._ensure_permissions_reader_available()
373
396
  return FileIdentitiesStream(
374
397
  catalog_schema=self.stream_schemas.get(FileIdentitiesStream.IDENTITIES_STREAM_NAME),
375
- stream_reader=self.stream_reader,
398
+ stream_permissions_reader=self.stream_permissions_reader, # type: ignore
376
399
  discovery_policy=self.discovery_policy,
377
400
  errors_collector=self.errors_collector,
378
401
  )
@@ -0,0 +1,123 @@
1
+ #
2
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import logging
6
+ from abc import ABC, abstractmethod
7
+ from typing import Any, Dict, Iterable, Optional
8
+
9
+ from airbyte_cdk.sources.file_based import AbstractFileBasedSpec
10
+ from airbyte_cdk.sources.file_based.remote_file import RemoteFile
11
+
12
+
13
+ class AbstractFileBasedStreamPermissionsReader(ABC):
14
+ """
15
+ This class is responsible for reading file permissions and Identities from a source.
16
+ """
17
+
18
+ def __init__(self) -> None:
19
+ self._config = None
20
+
21
+ @property
22
+ def config(self) -> Optional[AbstractFileBasedSpec]:
23
+ return self._config
24
+
25
+ @config.setter
26
+ @abstractmethod
27
+ def config(self, value: AbstractFileBasedSpec) -> None:
28
+ """
29
+ FileBasedSource reads the config from disk and parses it, and once parsed, the source sets the config on its StreamReader.
30
+
31
+ Note: FileBasedSource only requires the keys defined in the abstract config, whereas concrete implementations of StreamReader
32
+ will require keys that (for example) allow it to authenticate with the 3rd party.
33
+
34
+ Therefore, concrete implementations of AbstractFileBasedStreamPermissionsReader's's config setter should assert that `value` is of the correct
35
+ config type for that type of StreamReader.
36
+ """
37
+ ...
38
+
39
+ @abstractmethod
40
+ def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger) -> Dict[str, Any]:
41
+ """
42
+ This function should return the allow list for a given file, i.e. the list of all identities and their permission levels associated with it
43
+
44
+ e.g.
45
+ def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger):
46
+ api_conn = some_api.conn(credentials=SOME_CREDENTIALS)
47
+ result = api_conn.get_file_permissions_info(file.id)
48
+ return MyPermissionsModel(
49
+ id=result["id"],
50
+ access_control_list = result["access_control_list"],
51
+ is_public = result["is_public"],
52
+ ).dict()
53
+ """
54
+ ...
55
+
56
+ @abstractmethod
57
+ def load_identity_groups(self, logger: logging.Logger) -> Iterable[Dict[str, Any]]:
58
+ """
59
+ This function should return the Identities in a determined "space" or "domain" where the file metadata (ACLs) are fetched and ACLs items (Identities) exists.
60
+
61
+ e.g.
62
+ def load_identity_groups(self, logger: logging.Logger) -> Iterable[Dict[str, Any]]:
63
+ api_conn = some_api.conn(credentials=SOME_CREDENTIALS)
64
+ users_api = api_conn.users()
65
+ groups_api = api_conn.groups()
66
+ members_api = self.google_directory_service.members()
67
+ for user in users_api.list():
68
+ yield my_identity_model(id=user.id, name=user.name, email_address=user.email, type="user").dict()
69
+ for group in groups_api.list():
70
+ group_obj = my_identity_model(id=group.id, name=groups.name, email_address=user.email, type="group").dict()
71
+ for member in members_api.list(group=group):
72
+ group_obj.member_email_addresses = group_obj.member_email_addresses or []
73
+ group_obj.member_email_addresses.append(member.email)
74
+ yield group_obj.dict()
75
+ """
76
+ ...
77
+
78
+ @property
79
+ @abstractmethod
80
+ def file_permissions_schema(self) -> Dict[str, Any]:
81
+ """
82
+ This function should return the permissions schema for file permissions stream.
83
+
84
+ e.g.
85
+ def file_permissions_schema(self) -> Dict[str, Any]:
86
+ # you can also follow the pattern we have for python connectors and have a json file and read from there e.g. schemas/identities.json
87
+ return {
88
+ "type": "object",
89
+ "properties": {
90
+ "id": { "type": "string" },
91
+ "file_path": { "type": "string" },
92
+ "access_control_list": {
93
+ "type": "array",
94
+ "items": { "type": "string" }
95
+ },
96
+ "publicly_accessible": { "type": "boolean" }
97
+ }
98
+ }
99
+ """
100
+ ...
101
+
102
+ @property
103
+ @abstractmethod
104
+ def identities_schema(self) -> Dict[str, Any]:
105
+ """
106
+ This function should return the identities schema for file identity stream.
107
+
108
+ e.g.
109
+ def identities_schema(self) -> Dict[str, Any]:
110
+ # you can also follow the pattern we have for python connectors and have a json file and read from there e.g. schemas/identities.json
111
+ return {
112
+ "type": "object",
113
+ "properties": {
114
+ "id": { "type": "string" },
115
+ "remote_id": { "type": "string" },
116
+ "name": { "type": ["null", "string"] },
117
+ "email_address": { "type": ["null", "string"] },
118
+ "member_email_addresses": { "type": ["null", "array"] },
119
+ "type": { "type": "string" },
120
+ }
121
+ }
122
+ """
123
+ ...
@@ -184,97 +184,3 @@ class AbstractFileBasedStreamReader(ABC):
184
184
  makedirs(path.dirname(local_file_path), exist_ok=True)
185
185
  absolute_file_path = path.abspath(local_file_path)
186
186
  return [file_relative_path, local_file_path, absolute_file_path]
187
-
188
- @abstractmethod
189
- def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger) -> Dict[str, Any]:
190
- """
191
- This function should return the allow list for a given file, i.e. the list of all identities and their permission levels associated with it
192
-
193
- e.g.
194
- def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger):
195
- api_conn = some_api.conn(credentials=SOME_CREDENTIALS)
196
- result = api_conn.get_file_permissions_info(file.id)
197
- return MyPermissionsModel(
198
- id=result["id"],
199
- access_control_list = result["access_control_list"],
200
- is_public = result["is_public"],
201
- ).dict()
202
- """
203
- raise NotImplementedError(
204
- f"{self.__class__.__name__} does not implement get_file_acl_permissions(). To support ACL permissions, implement this method and update file_permissions_schema."
205
- )
206
-
207
- @abstractmethod
208
- def load_identity_groups(self, logger: logging.Logger) -> Iterable[Dict[str, Any]]:
209
- """
210
- This function should return the Identities in a determined "space" or "domain" where the file metadata (ACLs) are fetched and ACLs items (Identities) exists.
211
-
212
- e.g.
213
- def load_identity_groups(self, logger: logging.Logger) -> Dict[str, Any]:
214
- api_conn = some_api.conn(credentials=SOME_CREDENTIALS)
215
- users_api = api_conn.users()
216
- groups_api = api_conn.groups()
217
- members_api = self.google_directory_service.members()
218
- for user in users_api.list():
219
- yield my_identity_model(id=user.id, name=user.name, email_address=user.email, type="user").dict()
220
- for group in groups_api.list():
221
- group_obj = my_identity_model(id=group.id, name=groups.name, email_address=user.email, type="group").dict()
222
- for member in members_api.list(group=group):
223
- group_obj.member_email_addresses = group_obj.member_email_addresses or []
224
- group_obj.member_email_addresses.append(member.email)
225
- yield group_obj.dict()
226
- """
227
- raise NotImplementedError(
228
- f"{self.__class__.__name__} does not implement load_identity_groups(). To support identities, implement this method and update identities_schema."
229
- )
230
-
231
- @property
232
- @abstractmethod
233
- def file_permissions_schema(self) -> Dict[str, Any]:
234
- """
235
- This function should return the permissions schema for file permissions stream.
236
-
237
- e.g.
238
- def file_permissions_schema(self) -> Dict[str, Any]:
239
- # you can also follow the patter we have for python connectors and have a json file and read from there e.g. schemas/identities.json
240
- return {
241
- "type": "object",
242
- "properties": {
243
- "id": { "type": "string" },
244
- "file_path": { "type": "string" },
245
- "access_control_list": {
246
- "type": "array",
247
- "items": { "type": "string" }
248
- },
249
- "publicly_accessible": { "type": "boolean" }
250
- }
251
- }
252
- """
253
- raise NotImplementedError(
254
- f"{self.__class__.__name__} does not implement file_permissions_schema, please return json schema for your permissions streams."
255
- )
256
-
257
- @property
258
- @abstractmethod
259
- def identities_schema(self) -> Dict[str, Any]:
260
- """
261
- This function should return the identities schema for file identity stream.
262
-
263
- e.g.
264
- def identities_schema(self) -> Dict[str, Any]:
265
- # you can also follow the patter we have for python connectors and have a json file and read from there e.g. schemas/identities.json
266
- return {
267
- "type": "object",
268
- "properties": {
269
- "id": { "type": "string" },
270
- "remote_id": { "type": "string" },
271
- "name": { "type": ["null", "string"] },
272
- "email_address": { "type": ["null", "string"] },
273
- "member_email_addresses": { "type": ["null", "array"] },
274
- "type": { "type": "string" },
275
- }
276
- }
277
- """
278
- raise NotImplementedError(
279
- f"{self.__class__.__name__} does not implement identities_schema, please return json schema for your identities stream."
280
- )
@@ -356,10 +356,7 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
356
356
  if "null" not in v:
357
357
  schema[k] = ["null"] + v
358
358
  elif v != "null":
359
- if isinstance(v, (str, list)):
360
- schema[k] = ["null", v]
361
- else:
362
- DefaultFileBasedStream._fill_nulls(v)
359
+ schema[k] = ["null", v]
363
360
  else:
364
361
  DefaultFileBasedStream._fill_nulls(v)
365
362
  elif isinstance(schema, list):
@@ -8,7 +8,9 @@ from typing import Any, Dict, Iterable, Mapping, MutableMapping, Optional
8
8
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import PrimaryKeyType
9
9
  from airbyte_cdk.sources.file_based.discovery_policy import AbstractDiscoveryPolicy
10
10
  from airbyte_cdk.sources.file_based.exceptions import FileBasedErrorsCollector
11
- from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
11
+ from airbyte_cdk.sources.file_based.file_based_stream_permissions_reader import (
12
+ AbstractFileBasedStreamPermissionsReader,
13
+ )
12
14
  from airbyte_cdk.sources.streams.core import JsonSchema
13
15
  from airbyte_cdk.sources.streams.permissions.identities_stream import IdentitiesStream
14
16
 
@@ -24,13 +26,13 @@ class FileIdentitiesStream(IdentitiesStream):
24
26
  def __init__(
25
27
  self,
26
28
  catalog_schema: Optional[Mapping[str, Any]],
27
- stream_reader: AbstractFileBasedStreamReader,
29
+ stream_permissions_reader: AbstractFileBasedStreamPermissionsReader,
28
30
  discovery_policy: AbstractDiscoveryPolicy,
29
31
  errors_collector: FileBasedErrorsCollector,
30
32
  ) -> None:
31
33
  super().__init__()
32
34
  self.catalog_schema = catalog_schema
33
- self.stream_reader = stream_reader
35
+ self.stream_permissions_reader = stream_permissions_reader
34
36
  self._discovery_policy = discovery_policy
35
37
  self.errors_collector = errors_collector
36
38
  self._cursor: MutableMapping[str, Any] = {}
@@ -40,8 +42,8 @@ class FileIdentitiesStream(IdentitiesStream):
40
42
  return None
41
43
 
42
44
  def load_identity_groups(self) -> Iterable[Dict[str, Any]]:
43
- return self.stream_reader.load_identity_groups(logger=self.logger)
45
+ return self.stream_permissions_reader.load_identity_groups(logger=self.logger)
44
46
 
45
47
  @cache
46
48
  def get_json_schema(self) -> JsonSchema:
47
- return self.stream_reader.identities_schema
49
+ return self.stream_permissions_reader.identities_schema
@@ -7,6 +7,9 @@ from typing import Any, Dict, Iterable
7
7
 
8
8
  from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level
9
9
  from airbyte_cdk.models import Type as MessageType
10
+ from airbyte_cdk.sources.file_based.file_based_stream_permissions_reader import (
11
+ AbstractFileBasedStreamPermissionsReader,
12
+ )
10
13
  from airbyte_cdk.sources.file_based.stream import DefaultFileBasedStream
11
14
  from airbyte_cdk.sources.file_based.types import StreamSlice
12
15
  from airbyte_cdk.sources.streams.core import JsonSchema
@@ -26,10 +29,16 @@ class PermissionsFileBasedStream(DefaultFileBasedStream):
26
29
  and schema definition, while this class handles the streaming interface.
27
30
  """
28
31
 
32
+ def __init__(
33
+ self, stream_permissions_reader: AbstractFileBasedStreamPermissionsReader, **kwargs: Any
34
+ ):
35
+ super().__init__(**kwargs)
36
+ self.stream_permissions_reader = stream_permissions_reader
37
+
29
38
  def _filter_schema_invalid_properties(
30
39
  self, configured_catalog_json_schema: Dict[str, Any]
31
40
  ) -> Dict[str, Any]:
32
- return self.stream_reader.file_permissions_schema
41
+ return self.stream_permissions_reader.file_permissions_schema
33
42
 
34
43
  def read_records_from_slice(self, stream_slice: StreamSlice) -> Iterable[AirbyteMessage]:
35
44
  """
@@ -40,7 +49,7 @@ class PermissionsFileBasedStream(DefaultFileBasedStream):
40
49
  no_permissions = False
41
50
  file_datetime_string = file.last_modified.strftime(self.DATE_TIME_FORMAT)
42
51
  try:
43
- permissions_record = self.stream_reader.get_file_acl_permissions(
52
+ permissions_record = self.stream_permissions_reader.get_file_acl_permissions(
44
53
  file, logger=self.logger
45
54
  )
46
55
  if not permissions_record:
@@ -82,4 +91,4 @@ class PermissionsFileBasedStream(DefaultFileBasedStream):
82
91
  Returns:
83
92
  The file permissions schema that defines the structure of permission records
84
93
  """
85
- return self.stream_reader.file_permissions_schema
94
+ return self.stream_permissions_reader.file_permissions_schema
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 6.38.5.dev0
3
+ Version: 6.39.0
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -50,7 +50,7 @@ airbyte_cdk/sources/declarative/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4G
50
50
  airbyte_cdk/sources/declarative/async_job/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  airbyte_cdk/sources/declarative/async_job/job.py,sha256=V4Z6NohXwTlOavDbD-tUUQxOr7Lzpb_r4tRC64AfvDE,1702
52
52
  airbyte_cdk/sources/declarative/async_job/job_orchestrator.py,sha256=nUimSsq1nbEe3UPqsVC9mj8Zh2GYptJUZHQSVTbpWIc,21026
53
- airbyte_cdk/sources/declarative/async_job/job_tracker.py,sha256=SQt21SftVgP7RUCQ8LA2vaCn-YEbyX1BnhibfTX9oaE,2321
53
+ airbyte_cdk/sources/declarative/async_job/job_tracker.py,sha256=oAaqKxj5dGKeF5wkqiOZbu5gW6JvtaROxirDU2KqT1o,2565
54
54
  airbyte_cdk/sources/declarative/async_job/repository.py,sha256=2OkWiZp5IKTOi_SIpP1U-Rw3gH36LBy_a8CgXoENTtg,1044
55
55
  airbyte_cdk/sources/declarative/async_job/status.py,sha256=mkExR-uOAO1ckUnclaUOa74l2N9CdhLbVFM6KDoBgBM,715
56
56
  airbyte_cdk/sources/declarative/async_job/timer.py,sha256=Fb8P72CQ7jIzJyzMSSNuBf2vt8bmrg9SrfmNxKwph2A,1242
@@ -71,7 +71,7 @@ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=rAp-sgld
71
71
  airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
72
72
  airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=_zGNGq31RNy_0QBLt_EcTvgPyhj7urPdx6oA3M5-r3o,3150
73
73
  airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
74
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=n8hJVquDj00_VS_I0B2QgwYNcNcfsVZdkajAKArcOHU,147487
74
+ airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=2kFMQC2TpM_dmNJe0vYtez5XzdFy4nnWo1WowqKG1pA,148008
75
75
  airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
76
76
  airbyte_cdk/sources/declarative/declarative_stream.py,sha256=venZjfpvtqr3oFSuvMBWtn4h9ayLhD4L65ACuXCDZ64,10445
77
77
  airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=JHb_0d3SE6kNY10mxA5YBEKPeSbsWYjByq1gUQxepoE,953
@@ -109,18 +109,18 @@ airbyte_cdk/sources/declarative/interpolation/interpolated_string.py,sha256=CQkH
109
109
  airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=9IoeuWam3L6GyN10L6U8xNWXmkt9cnahSDNkez1OmFY,982
110
110
  airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=UQeuS4Vpyp4hlOn-R3tRyeBX0e9IoV6jQ6gH-Jz8lY0,7182
111
111
  airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=HQKHKnjE17zKoPn27ZpTpugRZZQSaof4GVzUUZaV2eE,5081
112
- airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=TN6GCgLXaWDONTaJwQ3A5ELqC-sxwKz-UYSraJYB-dI,17078
112
+ airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=DJuLm_5iG66cyD16mRWG_rwmgNIORWSkFLxstC93Tc8,17209
113
113
  airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
114
  airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
115
115
  airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
116
116
  airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
117
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=DfbPi512ovaBSWDICJfjIkC3pXDn2aNr1BP-eiLOLyA,103556
117
+ airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=IbSrniMEvDhmiXtArtBpC2ie5pIC0tHh1JKnBSe3EcM,104712
118
118
  airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
119
119
  airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=jDw_TttD3_hpfevXOH-0Ws0eRuqt6wvED0BqosGPRjI,5938
120
120
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
121
121
  airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
122
122
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
123
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=4FEIBgkAn85qmOEjmi8rRPBERdktNpOFjXGbw0Fsau8,142441
123
+ airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=2VkO2gqw4j8sqmuIB-4JKhle4NJrFJy586M9YPatDc0,142496
124
124
  airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
125
125
  airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=VelO7zKqKtzMJ35jyFeg0ypJLQC0plqqIBNXoBW1G2E,3001
126
126
  airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
@@ -214,8 +214,9 @@ airbyte_cdk/sources/file_based/discovery_policy/__init__.py,sha256=gl3ey6mZbyfra
214
214
  airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py,sha256=dCfXX529Rd5rtopg4VeEgTPJjFtqjtjzPq6LCw18Wt0,605
215
215
  airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha256=-xujTidtrq6HC00WKbjQh1CZdT5LMuzkp5BLjqDmfTY,1007
216
216
  airbyte_cdk/sources/file_based/exceptions.py,sha256=WP0qkG6fpWoBpOyyicgp5YNE393VWyegq5qSy0v4QtM,7362
217
- airbyte_cdk/sources/file_based/file_based_source.py,sha256=JXfwc9KaW7PvjAbm2GJ7Ra3DJnCZH4KaE3WytYvtM1Q,18925
218
- airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=d2UZ3C8M-A591KvBvg8kDpVdpox0rKVlRhVy5bi-auc,11209
217
+ airbyte_cdk/sources/file_based/file_based_source.py,sha256=bIsut7ivHcl7YPO9cygDn0to23MRLu1Sym7jSr0Iy9A,20051
218
+ airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py,sha256=4e7FXqQ9hueacexC0SyrZyjF8oREYHza8pKF9CgKbD8,5050
219
+ airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=0cmppYO3pZlFiJrs5oorF4JXv4ErhOeEMrdLG7P-Gdk,6742
219
220
  airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=blCLn0-2LC-ZdgcNyDEhqM2RiUvEjEBh-G4-t32ZtuM,1268
220
221
  airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=XNx-JC-sgzH9u3nOJ2M59FxBXvtig8LN6BIkeDOavZA,10858
221
222
  airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=QlCXB-ry3np67Q_VerQEPoWDOTcPTB6Go4ydZxY9ae4,20445
@@ -241,9 +242,9 @@ airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_c
241
242
  airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=MhFB5hOo8sjwvCh8gangaymdg3EJWYt_72brFOZt068,191
242
243
  airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py,sha256=om-x3gZFPgWDpi15S9RxZmR36VHnk8sytgN6LlBQhAw,1934
243
244
  airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py,sha256=VGV7xLyBribuBMVrXtO1xqkWJD86bl7yhXtjnwLMohM,7051
244
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=jyJLu2BUCYWKqrqD0ZUFxnrD0qybny7KbzKznxjIIpM,18199
245
- airbyte_cdk/sources/file_based/stream/identities_stream.py,sha256=DwgNU-jDp5vZ_WloQSUzBciDnAFMo8bXPjXpQx5-eko,1790
246
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py,sha256=i0Jn0zuAPomLa4pHSu9TQ3gAN5xXhNzPTYVwUDiDEyE,3523
245
+ airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=XLU5cNqQ-5mj243gNzMyXtm_oCtg1ORyoqbCsUo9Dn4,18044
246
+ airbyte_cdk/sources/file_based/stream/identities_stream.py,sha256=FZH83Geoy3K3nwUk2VVNJERFcXUTnl-4XljjucUM23s,1893
247
+ airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py,sha256=ke82qgm7snOlQTDx94Lqsc0cDkHWi3OJDTrPxffpFqc,3914
247
248
  airbyte_cdk/sources/file_based/types.py,sha256=INxG7OPnkdUP69oYNKMAbwhvV1AGvLRHs1J6pIia2FI,218
248
249
  airbyte_cdk/sources/http_config.py,sha256=OBZeuyFilm6NlDlBhFQvHhTWabEvZww6OHDIlZujIS0,730
249
250
  airbyte_cdk/sources/http_logger.py,sha256=H93kPAujHhPmXNX0JSFG3D-SL6yEFA5PtKot9Hu3TYA,1690
@@ -357,9 +358,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
357
358
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
358
359
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
359
360
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
360
- airbyte_cdk-6.38.5.dev0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
361
- airbyte_cdk-6.38.5.dev0.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
362
- airbyte_cdk-6.38.5.dev0.dist-info/METADATA,sha256=gwRY6sWvIeoJ_Jmemy4eDMpbZrOJaZtlPDiIygQTn_M,6018
363
- airbyte_cdk-6.38.5.dev0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
364
- airbyte_cdk-6.38.5.dev0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
365
- airbyte_cdk-6.38.5.dev0.dist-info/RECORD,,
361
+ airbyte_cdk-6.39.0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
362
+ airbyte_cdk-6.39.0.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
363
+ airbyte_cdk-6.39.0.dist-info/METADATA,sha256=7zM8BE2_CcIFxmvyC_TVNF7Dy9tvKFOnjnlOzCwpc40,6013
364
+ airbyte_cdk-6.39.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
365
+ airbyte_cdk-6.39.0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
366
+ airbyte_cdk-6.39.0.dist-info/RECORD,,