airbyte-cdk 6.8.1rc6__py3-none-any.whl → 6.8.1rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -56,8 +56,9 @@ from airbyte_cdk.sources.types import Config, StreamState
56
56
 
57
57
 
58
58
  class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
59
- # By default, we defer to a value of 1 which represents running a connector using the Concurrent CDK engine on only one thread.
60
- SINGLE_THREADED_CONCURRENCY_LEVEL = 1
59
+ # By default, we defer to a value of 2. A value lower than than could cause a PartitionEnqueuer to be stuck in a state of deadlock
60
+ # because it has hit the limit of futures but not partition reader is consuming them.
61
+ SINGLE_THREADED_CONCURRENCY_LEVEL = 2
61
62
 
62
63
  def __init__(
63
64
  self,
@@ -78,6 +79,9 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
78
79
  emit_connector_builder_messages=emit_connector_builder_messages,
79
80
  disable_resumable_full_refresh=True,
80
81
  )
82
+ self._config = config
83
+ self._concurrent_streams: Optional[List[AbstractStream]] = None
84
+ self._synchronous_streams: Optional[List[Stream]] = None
81
85
 
82
86
  super().__init__(
83
87
  source_config=source_config,
@@ -88,21 +92,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
88
92
 
89
93
  self._state = state
90
94
 
91
- self._concurrent_streams: Optional[List[AbstractStream]]
92
- self._synchronous_streams: Optional[List[Stream]]
93
-
94
- # If the connector command was SPEC, there is no incoming config, and we cannot instantiate streams because
95
- # they might depend on it. Ideally we want to have a static method on this class to get the spec without
96
- # any other arguments, but the existing entrypoint.py isn't designed to support this. Just noting this
97
- # for our future improvements to the CDK.
98
- if config:
99
- self._concurrent_streams, self._synchronous_streams = self._group_streams(
100
- config=config or {}
101
- )
102
- else:
103
- self._concurrent_streams = None
104
- self._synchronous_streams = None
105
-
106
95
  concurrency_level_from_manifest = self._source_config.get("concurrency_level")
107
96
  if concurrency_level_from_manifest:
108
97
  concurrency_level_component = self._constructor.create_component(
@@ -121,7 +110,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
121
110
  ) # Partition_generation iterates using range based on this value. If this is floored to zero we end up in a dead lock during start up
122
111
  else:
123
112
  concurrency_level = self.SINGLE_THREADED_CONCURRENCY_LEVEL
124
- initial_number_of_partitions_to_generate = self.SINGLE_THREADED_CONCURRENCY_LEVEL
113
+ initial_number_of_partitions_to_generate = self.SINGLE_THREADED_CONCURRENCY_LEVEL // 2
125
114
 
126
115
  self._concurrent_source = ConcurrentSource.create(
127
116
  num_workers=concurrency_level,
@@ -131,6 +120,19 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
131
120
  message_repository=self.message_repository, # type: ignore # message_repository is always instantiated with a value by factory
132
121
  )
133
122
 
123
+ def _actually_group(self) -> None:
124
+ # If the connector command was SPEC, there is no incoming config, and we cannot instantiate streams because
125
+ # they might depend on it. Ideally we want to have a static method on this class to get the spec without
126
+ # any other arguments, but the existing entrypoint.py isn't designed to support this. Just noting this
127
+ # for our future improvements to the CDK.
128
+ if self._config:
129
+ self._concurrent_streams, self._synchronous_streams = self._group_streams(
130
+ config=self._config or {}
131
+ )
132
+ else:
133
+ self._concurrent_streams = None
134
+ self._synchronous_streams = None
135
+
134
136
  def read(
135
137
  self,
136
138
  logger: logging.Logger,
@@ -140,6 +142,9 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
140
142
  ) -> Iterator[AirbyteMessage]:
141
143
  # ConcurrentReadProcessor pops streams that are finished being read so before syncing, the names of the concurrent
142
144
  # streams must be saved so that they can be removed from the catalog before starting synchronous streams
145
+ if self._concurrent_streams is None:
146
+ self._actually_group()
147
+
143
148
  if self._concurrent_streams:
144
149
  concurrent_stream_names = set(
145
150
  [concurrent_stream.name for concurrent_stream in self._concurrent_streams]
@@ -165,6 +170,9 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
165
170
  yield from super().read(logger, config, filtered_catalog, state)
166
171
 
167
172
  def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog:
173
+ if self._concurrent_streams is None:
174
+ self._actually_group()
175
+
168
176
  concurrent_streams = self._concurrent_streams or []
169
177
  synchronous_streams = self._synchronous_streams or []
170
178
  return AirbyteCatalog(
@@ -95,6 +95,7 @@ class HttpClient:
95
95
  ):
96
96
  self._name = name
97
97
  self._api_budget: APIBudget = api_budget or APIBudget(policies=[])
98
+ self._logger = logger
98
99
  if session:
99
100
  self._session = session
100
101
  else:
@@ -108,7 +109,6 @@ class HttpClient:
108
109
  )
109
110
  if isinstance(authenticator, AuthBase):
110
111
  self._session.auth = authenticator
111
- self._logger = logger
112
112
  self._error_handler = error_handler or HttpStatusErrorHandler(self._logger)
113
113
  if backoff_strategy is not None:
114
114
  if isinstance(backoff_strategy, list):
@@ -142,8 +142,9 @@ class HttpClient:
142
142
  if cache_dir:
143
143
  sqlite_path = str(Path(cache_dir) / self.cache_filename)
144
144
  else:
145
+ self._logger.info("Using memory for cache") # TODO: remove
145
146
  sqlite_path = "file::memory:?cache=shared"
146
- backend = SkipFailureSQLiteCache(sqlite_path)
147
+ backend = SkipFailureSQLiteCache(self._name, sqlite_path) # TODO maybe add a busy timeout
147
148
  return CachedLimiterSession(
148
149
  sqlite_path, backend=backend, api_budget=self._api_budget, match_headers=True
149
150
  ) # type: ignore # there are no typeshed stubs for requests_cache
@@ -541,6 +542,7 @@ class SkipFailureSQLiteDict(requests_cache.backends.sqlite.SQLiteDict):
541
542
  class SkipFailureSQLiteCache(requests_cache.backends.sqlite.SQLiteCache):
542
543
  def __init__( # type: ignore # ignoring as lib is not typed
543
544
  self,
545
+ table_name="response",
544
546
  db_path="http_cache",
545
547
  serializer=None,
546
548
  **kwargs,
@@ -548,11 +550,13 @@ class SkipFailureSQLiteCache(requests_cache.backends.sqlite.SQLiteCache):
548
550
  super().__init__(db_path, serializer, **kwargs)
549
551
  skwargs = {"serializer": serializer, **kwargs} if serializer else kwargs
550
552
  self.responses: requests_cache.backends.sqlite.SQLiteDict = SkipFailureSQLiteDict(
551
- db_path, table_name="responses", **skwargs
553
+ db_path, table_name=table_name, fast_save=True, wal=True, **skwargs
552
554
  )
553
555
  self.redirects: requests_cache.backends.sqlite.SQLiteDict = SkipFailureSQLiteDict(
554
556
  db_path,
555
- table_name="redirects",
557
+ table_name=f"redirects_{table_name}",
558
+ fast_save=True,
559
+ wal=True,
556
560
  lock=self.responses._lock,
557
561
  serializer=None,
558
562
  **kwargs,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 6.8.1rc6
3
+ Version: 6.8.1rc8
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -62,7 +62,7 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
62
62
  airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
63
63
  airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
64
64
  airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
65
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=4bpS63fz2K4mLPF33eG1xXgGRzzjjBAvpgDbiDrIs_Q,23549
65
+ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=rpWo7poxbNYJHPeV2tfiodJ4jeS6-5nfXV31CuIjjw4,23903
66
66
  airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
67
67
  airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
68
68
  airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=8VZJP18eJLabSPP1XBSPDaagUBG6q1ynIiPJy3rE2mc,5344
@@ -273,7 +273,7 @@ airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py,sha
273
273
  airbyte_cdk/sources/streams/http/error_handlers/response_models.py,sha256=xGIVELBFY0TmH9aUq1ikoqJz8oHLr6di2JLvKWVEO-s,2236
274
274
  airbyte_cdk/sources/streams/http/exceptions.py,sha256=njC7MlMJoFYcSGz4mIp6-bqLFTr6vC8ej25X0oSeyjE,1824
275
275
  airbyte_cdk/sources/streams/http/http.py,sha256=h0bq4arzMeJsR-5HZNfGYXtZhgVvLbW6myi9fuhMayU,28467
276
- airbyte_cdk/sources/streams/http/http_client.py,sha256=WxmoFeoISmRnwp2nzfJ0DeD1-05FhL0z8hIni-o7vV8,23067
276
+ airbyte_cdk/sources/streams/http/http_client.py,sha256=mBv9ck2mUv4kvVozPmHN5gHVLIgZ9e11trUsTjskPiM,23308
277
277
  airbyte_cdk/sources/streams/http/rate_limiting.py,sha256=IwdjrHKUnU97XO4qONgYRv4YYW51xQ8SJm4WLafXDB8,6351
278
278
  airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py,sha256=RN0D3nOX1xLgwEwKWu6pkGy3XqBFzKSNZ8Lf6umU2eY,413
279
279
  airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py,sha256=nxI94yJ3bGfpDO8RR3QvOJ-PSW0n9CElSAkgl5ae80Y,10321
@@ -330,8 +330,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
330
330
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=LVc9KbtMeV_z99jWo0Ou8u4l6eBJ0BWNhxj4zrrGKRs,763
331
331
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
332
332
  airbyte_cdk/utils/traced_exception.py,sha256=a6q51tBS3IdtefuOiL1eBwSmnNAXfjFMlMjSIQ_Tl-o,6165
333
- airbyte_cdk-6.8.1rc6.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
334
- airbyte_cdk-6.8.1rc6.dist-info/METADATA,sha256=hIuuXLu8aB31Ru7gvg99UecbsE_uRdvwX8TLSBXVBXg,13522
335
- airbyte_cdk-6.8.1rc6.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
336
- airbyte_cdk-6.8.1rc6.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
337
- airbyte_cdk-6.8.1rc6.dist-info/RECORD,,
333
+ airbyte_cdk-6.8.1rc8.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
334
+ airbyte_cdk-6.8.1rc8.dist-info/METADATA,sha256=cQWW8Fk6YGrxN-sqIW9PKhXR2pKGkURuYFUhsYgP3po,13522
335
+ airbyte_cdk-6.8.1rc8.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
336
+ airbyte_cdk-6.8.1rc8.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
337
+ airbyte_cdk-6.8.1rc8.dist-info/RECORD,,