airbyte-cdk 6.8.1rc6__py3-none-any.whl → 6.8.1rc8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +26 -18
- airbyte_cdk/sources/streams/http/http_client.py +8 -4
- {airbyte_cdk-6.8.1rc6.dist-info → airbyte_cdk-6.8.1rc8.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.8.1rc6.dist-info → airbyte_cdk-6.8.1rc8.dist-info}/RECORD +7 -7
- {airbyte_cdk-6.8.1rc6.dist-info → airbyte_cdk-6.8.1rc8.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.8.1rc6.dist-info → airbyte_cdk-6.8.1rc8.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.8.1rc6.dist-info → airbyte_cdk-6.8.1rc8.dist-info}/entry_points.txt +0 -0
@@ -56,8 +56,9 @@ from airbyte_cdk.sources.types import Config, StreamState
|
|
56
56
|
|
57
57
|
|
58
58
|
class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
59
|
-
# By default, we defer to a value of
|
60
|
-
|
59
|
+
# By default, we defer to a value of 2. A value lower than than could cause a PartitionEnqueuer to be stuck in a state of deadlock
|
60
|
+
# because it has hit the limit of futures but not partition reader is consuming them.
|
61
|
+
SINGLE_THREADED_CONCURRENCY_LEVEL = 2
|
61
62
|
|
62
63
|
def __init__(
|
63
64
|
self,
|
@@ -78,6 +79,9 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
78
79
|
emit_connector_builder_messages=emit_connector_builder_messages,
|
79
80
|
disable_resumable_full_refresh=True,
|
80
81
|
)
|
82
|
+
self._config = config
|
83
|
+
self._concurrent_streams: Optional[List[AbstractStream]] = None
|
84
|
+
self._synchronous_streams: Optional[List[Stream]] = None
|
81
85
|
|
82
86
|
super().__init__(
|
83
87
|
source_config=source_config,
|
@@ -88,21 +92,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
88
92
|
|
89
93
|
self._state = state
|
90
94
|
|
91
|
-
self._concurrent_streams: Optional[List[AbstractStream]]
|
92
|
-
self._synchronous_streams: Optional[List[Stream]]
|
93
|
-
|
94
|
-
# If the connector command was SPEC, there is no incoming config, and we cannot instantiate streams because
|
95
|
-
# they might depend on it. Ideally we want to have a static method on this class to get the spec without
|
96
|
-
# any other arguments, but the existing entrypoint.py isn't designed to support this. Just noting this
|
97
|
-
# for our future improvements to the CDK.
|
98
|
-
if config:
|
99
|
-
self._concurrent_streams, self._synchronous_streams = self._group_streams(
|
100
|
-
config=config or {}
|
101
|
-
)
|
102
|
-
else:
|
103
|
-
self._concurrent_streams = None
|
104
|
-
self._synchronous_streams = None
|
105
|
-
|
106
95
|
concurrency_level_from_manifest = self._source_config.get("concurrency_level")
|
107
96
|
if concurrency_level_from_manifest:
|
108
97
|
concurrency_level_component = self._constructor.create_component(
|
@@ -121,7 +110,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
121
110
|
) # Partition_generation iterates using range based on this value. If this is floored to zero we end up in a dead lock during start up
|
122
111
|
else:
|
123
112
|
concurrency_level = self.SINGLE_THREADED_CONCURRENCY_LEVEL
|
124
|
-
initial_number_of_partitions_to_generate = self.SINGLE_THREADED_CONCURRENCY_LEVEL
|
113
|
+
initial_number_of_partitions_to_generate = self.SINGLE_THREADED_CONCURRENCY_LEVEL // 2
|
125
114
|
|
126
115
|
self._concurrent_source = ConcurrentSource.create(
|
127
116
|
num_workers=concurrency_level,
|
@@ -131,6 +120,19 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
131
120
|
message_repository=self.message_repository, # type: ignore # message_repository is always instantiated with a value by factory
|
132
121
|
)
|
133
122
|
|
123
|
+
def _actually_group(self) -> None:
|
124
|
+
# If the connector command was SPEC, there is no incoming config, and we cannot instantiate streams because
|
125
|
+
# they might depend on it. Ideally we want to have a static method on this class to get the spec without
|
126
|
+
# any other arguments, but the existing entrypoint.py isn't designed to support this. Just noting this
|
127
|
+
# for our future improvements to the CDK.
|
128
|
+
if self._config:
|
129
|
+
self._concurrent_streams, self._synchronous_streams = self._group_streams(
|
130
|
+
config=self._config or {}
|
131
|
+
)
|
132
|
+
else:
|
133
|
+
self._concurrent_streams = None
|
134
|
+
self._synchronous_streams = None
|
135
|
+
|
134
136
|
def read(
|
135
137
|
self,
|
136
138
|
logger: logging.Logger,
|
@@ -140,6 +142,9 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
140
142
|
) -> Iterator[AirbyteMessage]:
|
141
143
|
# ConcurrentReadProcessor pops streams that are finished being read so before syncing, the names of the concurrent
|
142
144
|
# streams must be saved so that they can be removed from the catalog before starting synchronous streams
|
145
|
+
if self._concurrent_streams is None:
|
146
|
+
self._actually_group()
|
147
|
+
|
143
148
|
if self._concurrent_streams:
|
144
149
|
concurrent_stream_names = set(
|
145
150
|
[concurrent_stream.name for concurrent_stream in self._concurrent_streams]
|
@@ -165,6 +170,9 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
165
170
|
yield from super().read(logger, config, filtered_catalog, state)
|
166
171
|
|
167
172
|
def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog:
|
173
|
+
if self._concurrent_streams is None:
|
174
|
+
self._actually_group()
|
175
|
+
|
168
176
|
concurrent_streams = self._concurrent_streams or []
|
169
177
|
synchronous_streams = self._synchronous_streams or []
|
170
178
|
return AirbyteCatalog(
|
@@ -95,6 +95,7 @@ class HttpClient:
|
|
95
95
|
):
|
96
96
|
self._name = name
|
97
97
|
self._api_budget: APIBudget = api_budget or APIBudget(policies=[])
|
98
|
+
self._logger = logger
|
98
99
|
if session:
|
99
100
|
self._session = session
|
100
101
|
else:
|
@@ -108,7 +109,6 @@ class HttpClient:
|
|
108
109
|
)
|
109
110
|
if isinstance(authenticator, AuthBase):
|
110
111
|
self._session.auth = authenticator
|
111
|
-
self._logger = logger
|
112
112
|
self._error_handler = error_handler or HttpStatusErrorHandler(self._logger)
|
113
113
|
if backoff_strategy is not None:
|
114
114
|
if isinstance(backoff_strategy, list):
|
@@ -142,8 +142,9 @@ class HttpClient:
|
|
142
142
|
if cache_dir:
|
143
143
|
sqlite_path = str(Path(cache_dir) / self.cache_filename)
|
144
144
|
else:
|
145
|
+
self._logger.info("Using memory for cache") # TODO: remove
|
145
146
|
sqlite_path = "file::memory:?cache=shared"
|
146
|
-
backend = SkipFailureSQLiteCache(sqlite_path)
|
147
|
+
backend = SkipFailureSQLiteCache(self._name, sqlite_path) # TODO maybe add a busy timeout
|
147
148
|
return CachedLimiterSession(
|
148
149
|
sqlite_path, backend=backend, api_budget=self._api_budget, match_headers=True
|
149
150
|
) # type: ignore # there are no typeshed stubs for requests_cache
|
@@ -541,6 +542,7 @@ class SkipFailureSQLiteDict(requests_cache.backends.sqlite.SQLiteDict):
|
|
541
542
|
class SkipFailureSQLiteCache(requests_cache.backends.sqlite.SQLiteCache):
|
542
543
|
def __init__( # type: ignore # ignoring as lib is not typed
|
543
544
|
self,
|
545
|
+
table_name="response",
|
544
546
|
db_path="http_cache",
|
545
547
|
serializer=None,
|
546
548
|
**kwargs,
|
@@ -548,11 +550,13 @@ class SkipFailureSQLiteCache(requests_cache.backends.sqlite.SQLiteCache):
|
|
548
550
|
super().__init__(db_path, serializer, **kwargs)
|
549
551
|
skwargs = {"serializer": serializer, **kwargs} if serializer else kwargs
|
550
552
|
self.responses: requests_cache.backends.sqlite.SQLiteDict = SkipFailureSQLiteDict(
|
551
|
-
db_path, table_name=
|
553
|
+
db_path, table_name=table_name, fast_save=True, wal=True, **skwargs
|
552
554
|
)
|
553
555
|
self.redirects: requests_cache.backends.sqlite.SQLiteDict = SkipFailureSQLiteDict(
|
554
556
|
db_path,
|
555
|
-
table_name="
|
557
|
+
table_name=f"redirects_{table_name}",
|
558
|
+
fast_save=True,
|
559
|
+
wal=True,
|
556
560
|
lock=self.responses._lock,
|
557
561
|
serializer=None,
|
558
562
|
**kwargs,
|
@@ -62,7 +62,7 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
|
|
62
62
|
airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
|
63
63
|
airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
|
64
64
|
airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
|
65
|
-
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=
|
65
|
+
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=rpWo7poxbNYJHPeV2tfiodJ4jeS6-5nfXV31CuIjjw4,23903
|
66
66
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
|
67
67
|
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
|
68
68
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=8VZJP18eJLabSPP1XBSPDaagUBG6q1ynIiPJy3rE2mc,5344
|
@@ -273,7 +273,7 @@ airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py,sha
|
|
273
273
|
airbyte_cdk/sources/streams/http/error_handlers/response_models.py,sha256=xGIVELBFY0TmH9aUq1ikoqJz8oHLr6di2JLvKWVEO-s,2236
|
274
274
|
airbyte_cdk/sources/streams/http/exceptions.py,sha256=njC7MlMJoFYcSGz4mIp6-bqLFTr6vC8ej25X0oSeyjE,1824
|
275
275
|
airbyte_cdk/sources/streams/http/http.py,sha256=h0bq4arzMeJsR-5HZNfGYXtZhgVvLbW6myi9fuhMayU,28467
|
276
|
-
airbyte_cdk/sources/streams/http/http_client.py,sha256=
|
276
|
+
airbyte_cdk/sources/streams/http/http_client.py,sha256=mBv9ck2mUv4kvVozPmHN5gHVLIgZ9e11trUsTjskPiM,23308
|
277
277
|
airbyte_cdk/sources/streams/http/rate_limiting.py,sha256=IwdjrHKUnU97XO4qONgYRv4YYW51xQ8SJm4WLafXDB8,6351
|
278
278
|
airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py,sha256=RN0D3nOX1xLgwEwKWu6pkGy3XqBFzKSNZ8Lf6umU2eY,413
|
279
279
|
airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py,sha256=nxI94yJ3bGfpDO8RR3QvOJ-PSW0n9CElSAkgl5ae80Y,10321
|
@@ -330,8 +330,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
|
|
330
330
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=LVc9KbtMeV_z99jWo0Ou8u4l6eBJ0BWNhxj4zrrGKRs,763
|
331
331
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
332
332
|
airbyte_cdk/utils/traced_exception.py,sha256=a6q51tBS3IdtefuOiL1eBwSmnNAXfjFMlMjSIQ_Tl-o,6165
|
333
|
-
airbyte_cdk-6.8.
|
334
|
-
airbyte_cdk-6.8.
|
335
|
-
airbyte_cdk-6.8.
|
336
|
-
airbyte_cdk-6.8.
|
337
|
-
airbyte_cdk-6.8.
|
333
|
+
airbyte_cdk-6.8.1rc8.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
334
|
+
airbyte_cdk-6.8.1rc8.dist-info/METADATA,sha256=cQWW8Fk6YGrxN-sqIW9PKhXR2pKGkURuYFUhsYgP3po,13522
|
335
|
+
airbyte_cdk-6.8.1rc8.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
336
|
+
airbyte_cdk-6.8.1rc8.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
337
|
+
airbyte_cdk-6.8.1rc8.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|