airbyte-cdk 6.8.1rc9__py3-none-any.whl → 6.8.2.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +11 -5
  2. airbyte_cdk/config_observation.py +1 -1
  3. airbyte_cdk/connector_builder/main.py +1 -1
  4. airbyte_cdk/connector_builder/message_grouper.py +10 -10
  5. airbyte_cdk/destinations/destination.py +1 -1
  6. airbyte_cdk/destinations/vector_db_based/embedder.py +2 -2
  7. airbyte_cdk/destinations/vector_db_based/writer.py +12 -4
  8. airbyte_cdk/entrypoint.py +7 -6
  9. airbyte_cdk/logger.py +2 -2
  10. airbyte_cdk/sources/abstract_source.py +1 -1
  11. airbyte_cdk/sources/config.py +1 -1
  12. airbyte_cdk/sources/connector_state_manager.py +9 -4
  13. airbyte_cdk/sources/declarative/auth/oauth.py +1 -1
  14. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +6 -1
  15. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +76 -28
  16. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +10 -4
  17. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +16 -17
  18. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +4 -1
  19. airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
  20. airbyte_cdk/sources/declarative/incremental/__init__.py +3 -0
  21. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +270 -0
  22. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +8 -6
  23. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +9 -0
  24. airbyte_cdk/sources/declarative/interpolation/jinja.py +35 -36
  25. airbyte_cdk/sources/declarative/interpolation/macros.py +1 -1
  26. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +71 -17
  27. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +13 -7
  28. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +1 -1
  29. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +8 -6
  30. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +1 -1
  31. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +2 -2
  32. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +1 -1
  33. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +5 -2
  34. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
  35. airbyte_cdk/sources/declarative/spec/spec.py +1 -1
  36. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +0 -1
  37. airbyte_cdk/sources/embedded/base_integration.py +3 -2
  38. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +12 -4
  39. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +18 -7
  40. airbyte_cdk/sources/file_based/file_types/avro_parser.py +14 -11
  41. airbyte_cdk/sources/file_based/file_types/csv_parser.py +3 -3
  42. airbyte_cdk/sources/file_based/file_types/excel_parser.py +11 -5
  43. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +1 -1
  44. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +2 -2
  45. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +6 -3
  46. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +1 -1
  47. airbyte_cdk/sources/http_logger.py +3 -3
  48. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +5 -2
  49. airbyte_cdk/sources/streams/concurrent/adapters.py +6 -3
  50. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +9 -3
  51. airbyte_cdk/sources/streams/concurrent/cursor.py +10 -1
  52. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +2 -2
  53. airbyte_cdk/sources/streams/core.py +17 -14
  54. airbyte_cdk/sources/streams/http/http.py +19 -19
  55. airbyte_cdk/sources/streams/http/http_client.py +4 -48
  56. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
  57. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +62 -33
  58. airbyte_cdk/sources/utils/record_helper.py +1 -1
  59. airbyte_cdk/sources/utils/schema_helpers.py +1 -1
  60. airbyte_cdk/sources/utils/transform.py +34 -15
  61. airbyte_cdk/test/entrypoint_wrapper.py +11 -6
  62. airbyte_cdk/test/mock_http/response_builder.py +1 -1
  63. airbyte_cdk/utils/airbyte_secrets_utils.py +1 -1
  64. airbyte_cdk/utils/event_timing.py +10 -10
  65. airbyte_cdk/utils/message_utils.py +4 -3
  66. airbyte_cdk/utils/spec_schema_transformations.py +3 -2
  67. airbyte_cdk/utils/traced_exception.py +14 -12
  68. airbyte_cdk-6.8.2.dev1.dist-info/METADATA +111 -0
  69. {airbyte_cdk-6.8.1rc9.dist-info → airbyte_cdk-6.8.2.dev1.dist-info}/RECORD +72 -71
  70. airbyte_cdk-6.8.1rc9.dist-info/METADATA +0 -307
  71. {airbyte_cdk-6.8.1rc9.dist-info → airbyte_cdk-6.8.2.dev1.dist-info}/LICENSE.txt +0 -0
  72. {airbyte_cdk-6.8.1rc9.dist-info → airbyte_cdk-6.8.2.dev1.dist-info}/WHEEL +0 -0
  73. {airbyte_cdk-6.8.1rc9.dist-info → airbyte_cdk-6.8.2.dev1.dist-info}/entry_points.txt +0 -0
@@ -25,7 +25,7 @@ from datetime import datetime
25
25
  from pathlib import Path
26
26
  from typing import Any, cast
27
27
 
28
- from orjson import orjson
28
+ import orjson
29
29
 
30
30
  from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch
31
31
  from airbyte_cdk.models import (
@@ -72,7 +72,7 @@ class SourceLocalYaml(YamlDeclarativeSource):
72
72
  super().__init__(
73
73
  catalog=catalog,
74
74
  config=config,
75
- state=state,
75
+ state=state, # type: ignore [arg-type]
76
76
  path_to_yaml="manifest.yaml",
77
77
  )
78
78
 
@@ -152,7 +152,9 @@ def handle_remote_manifest_command(args: list[str]) -> None:
152
152
  )
153
153
 
154
154
 
155
- def create_declarative_source(args: list[str]) -> ConcurrentDeclarativeSource:
155
+ def create_declarative_source(
156
+ args: list[str],
157
+ ) -> ConcurrentDeclarativeSource: # type: ignore [type-arg]
156
158
  """Creates the source with the injected config.
157
159
 
158
160
  This essentially does what other low-code sources do at build time, but at runtime,
@@ -160,10 +162,14 @@ def create_declarative_source(args: list[str]) -> ConcurrentDeclarativeSource:
160
162
  connector builder.
161
163
  """
162
164
  try:
165
+ config: Mapping[str, Any] | None
166
+ catalog: ConfiguredAirbyteCatalog | None
167
+ state: list[AirbyteStateMessage]
163
168
  config, catalog, state = _parse_inputs_into_config_catalog_state(args)
164
- if "__injected_declarative_manifest" not in config:
169
+ if config is None or "__injected_declarative_manifest" not in config:
165
170
  raise ValueError(
166
- f"Invalid config: `__injected_declarative_manifest` should be provided at the root of the config but config only has keys {list(config.keys())}"
171
+ "Invalid config: `__injected_declarative_manifest` should be provided at the root "
172
+ f"of the config but config only has keys: {list(config.keys() if config else [])}"
167
173
  )
168
174
  return ConcurrentDeclarativeSource(
169
175
  config=config,
@@ -10,7 +10,7 @@ import time
10
10
  from copy import copy
11
11
  from typing import Any, List, MutableMapping
12
12
 
13
- from orjson import orjson
13
+ import orjson
14
14
 
15
15
  from airbyte_cdk.models import (
16
16
  AirbyteControlConnectorConfigMessage,
@@ -6,7 +6,7 @@
6
6
  import sys
7
7
  from typing import Any, List, Mapping, Optional, Tuple
8
8
 
9
- from orjson import orjson
9
+ import orjson
10
10
 
11
11
  from airbyte_cdk.connector import BaseConnector
12
12
  from airbyte_cdk.connector_builder.connector_builder_handler import (
@@ -71,7 +71,7 @@ class MessageGrouper:
71
71
 
72
72
  is_nested_key = isinstance(field[0], str)
73
73
  if is_nested_key:
74
- return [field] # type: ignore # the type of field is expected to be List[str] here
74
+ return [field]
75
75
 
76
76
  raise ValueError(f"Unknown type for cursor field `{field}")
77
77
 
@@ -232,9 +232,9 @@ class MessageGrouper:
232
232
  current_slice_descriptor = self._parse_slice_description(message.log.message) # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message
233
233
  current_slice_pages = []
234
234
  at_least_one_page_in_group = False
235
- elif message.type == MessageType.LOG and message.log.message.startswith(
235
+ elif message.type == MessageType.LOG and message.log.message.startswith( # type: ignore[union-attr] # None doesn't have 'message'
236
236
  SliceLogger.SLICE_LOG_PREFIX
237
- ): # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message
237
+ ):
238
238
  # parsing the first slice
239
239
  current_slice_descriptor = self._parse_slice_description(message.log.message) # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message
240
240
  elif message.type == MessageType.LOG:
@@ -274,14 +274,14 @@ class MessageGrouper:
274
274
  if message.trace.type == TraceType.ERROR: # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has trace.type
275
275
  yield message.trace
276
276
  elif message.type == MessageType.RECORD:
277
- current_page_records.append(message.record.data) # type: ignore[union-attr] # AirbyteMessage with MessageType.RECORD has record.data
277
+ current_page_records.append(message.record.data) # type: ignore[arg-type, union-attr] # AirbyteMessage with MessageType.RECORD has record.data
278
278
  records_count += 1
279
279
  schema_inferrer.accumulate(message.record)
280
280
  datetime_format_inferrer.accumulate(message.record)
281
281
  elif (
282
282
  message.type == MessageType.CONTROL
283
- and message.control.type == OrchestratorType.CONNECTOR_CONFIG
284
- ): # type: ignore[union-attr] # AirbyteMessage with MessageType.CONTROL has control.type
283
+ and message.control.type == OrchestratorType.CONNECTOR_CONFIG # type: ignore[union-attr] # None doesn't have 'type'
284
+ ):
285
285
  yield message.control
286
286
  elif message.type == MessageType.STATE:
287
287
  latest_state_message = message.state # type: ignore[assignment]
@@ -310,8 +310,8 @@ class MessageGrouper:
310
310
  and message.type == MessageType.LOG
311
311
  and (
312
312
  MessageGrouper._is_page_http_request(json_message)
313
- or message.log.message.startswith("slice:")
314
- ) # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message
313
+ or message.log.message.startswith("slice:") # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message
314
+ )
315
315
  )
316
316
 
317
317
  @staticmethod
@@ -355,8 +355,8 @@ class MessageGrouper:
355
355
  StreamReadPages(
356
356
  request=current_page_request,
357
357
  response=current_page_response,
358
- records=deepcopy(current_page_records),
359
- ) # type: ignore
358
+ records=deepcopy(current_page_records), # type: ignore [arg-type]
359
+ )
360
360
  )
361
361
  current_page_records.clear()
362
362
 
@@ -9,7 +9,7 @@ import sys
9
9
  from abc import ABC, abstractmethod
10
10
  from typing import Any, Iterable, List, Mapping
11
11
 
12
- from orjson import orjson
12
+ import orjson
13
13
 
14
14
  from airbyte_cdk.connector import Connector
15
15
  from airbyte_cdk.exception_handler import init_uncaught_exception_handler
@@ -107,7 +107,7 @@ class BaseOpenAIEmbedder(Embedder):
107
107
  class OpenAIEmbedder(BaseOpenAIEmbedder):
108
108
  def __init__(self, config: OpenAIEmbeddingConfigModel, chunk_size: int):
109
109
  super().__init__(
110
- OpenAIEmbeddings(
110
+ OpenAIEmbeddings( # type: ignore [call-arg]
111
111
  openai_api_key=config.openai_key, max_retries=15, disallowed_special=()
112
112
  ),
113
113
  chunk_size,
@@ -118,7 +118,7 @@ class AzureOpenAIEmbedder(BaseOpenAIEmbedder):
118
118
  def __init__(self, config: AzureOpenAIEmbeddingConfigModel, chunk_size: int):
119
119
  # Azure OpenAI API has — as of 20230927 — a limit of 16 documents per request
120
120
  super().__init__(
121
- OpenAIEmbeddings(
121
+ OpenAIEmbeddings( # type: ignore [call-arg]
122
122
  openai_api_key=config.openai_key,
123
123
  chunk_size=16,
124
124
  max_retries=15,
@@ -83,11 +83,19 @@ class Writer:
83
83
  yield message
84
84
  elif message.type == Type.RECORD:
85
85
  record_chunks, record_id_to_delete = self.processor.process(message.record)
86
- self.chunks[(message.record.namespace, message.record.stream)].extend(record_chunks)
87
- if record_id_to_delete is not None:
88
- self.ids_to_delete[(message.record.namespace, message.record.stream)].append(
89
- record_id_to_delete
86
+ self.chunks[
87
+ ( # type: ignore [index] # expected "tuple[str, str]", got "tuple[str | Any | None, str | Any]"
88
+ message.record.namespace, # type: ignore [union-attr] # record not None
89
+ message.record.stream, # type: ignore [union-attr] # record not None
90
90
  )
91
+ ].extend(record_chunks)
92
+ if record_id_to_delete is not None:
93
+ self.ids_to_delete[
94
+ ( # type: ignore [index] # expected "tuple[str, str]", got "tuple[str | Any | None, str | Any]"
95
+ message.record.namespace, # type: ignore [union-attr] # record not None
96
+ message.record.stream, # type: ignore [union-attr] # record not None
97
+ )
98
+ ].append(record_id_to_delete)
91
99
  self.number_of_chunks += len(record_chunks)
92
100
  if self.number_of_chunks >= self.batch_size:
93
101
  self._process_batch()
airbyte_cdk/entrypoint.py CHANGED
@@ -22,7 +22,7 @@ from requests import PreparedRequest, Response, Session
22
22
  from airbyte_cdk.connector import TConfig
23
23
  from airbyte_cdk.exception_handler import init_uncaught_exception_handler
24
24
  from airbyte_cdk.logger import init_logger
25
- from airbyte_cdk.models import ( # type: ignore [attr-defined]
25
+ from airbyte_cdk.models import (
26
26
  AirbyteConnectionStatus,
27
27
  AirbyteMessage,
28
28
  AirbyteMessageSerializer,
@@ -255,9 +255,10 @@ class AirbyteEntrypoint(object):
255
255
 
256
256
  stream_message_count[
257
257
  HashableStreamDescriptor(
258
- name=message.record.stream, namespace=message.record.namespace
258
+ name=message.record.stream, # type: ignore[union-attr] # record has `stream`
259
+ namespace=message.record.namespace, # type: ignore[union-attr] # record has `namespace`
259
260
  )
260
- ] += 1.0 # type: ignore[union-attr] # record has `stream` and `namespace`
261
+ ] += 1.0
261
262
  case Type.STATE:
262
263
  if message.state is None:
263
264
  raise ValueError("State message must have a state attribute")
@@ -266,9 +267,9 @@ class AirbyteEntrypoint(object):
266
267
 
267
268
  # Set record count from the counter onto the state message
268
269
  message.state.sourceStats = message.state.sourceStats or AirbyteStateStats() # type: ignore[union-attr] # state has `sourceStats`
269
- message.state.sourceStats.recordCount = stream_message_count.get(
270
+ message.state.sourceStats.recordCount = stream_message_count.get( # type: ignore[union-attr] # state has `sourceStats`
270
271
  stream_descriptor, 0.0
271
- ) # type: ignore[union-attr] # state has `sourceStats`
272
+ )
272
273
 
273
274
  # Reset the counter
274
275
  stream_message_count[stream_descriptor] = 0.0
@@ -290,7 +291,7 @@ class AirbyteEntrypoint(object):
290
291
 
291
292
  @staticmethod
292
293
  def airbyte_message_to_string(airbyte_message: AirbyteMessage) -> str:
293
- return orjson.dumps(AirbyteMessageSerializer.dump(airbyte_message)).decode() # type: ignore[no-any-return] # orjson.dumps(message).decode() always returns string
294
+ return orjson.dumps(AirbyteMessageSerializer.dump(airbyte_message)).decode()
294
295
 
295
296
  @classmethod
296
297
  def extract_state(cls, args: List[str]) -> Optional[Any]:
airbyte_cdk/logger.py CHANGED
@@ -7,7 +7,7 @@ import logging
7
7
  import logging.config
8
8
  from typing import Any, Callable, Mapping, Optional, Tuple
9
9
 
10
- from orjson import orjson
10
+ import orjson
11
11
 
12
12
  from airbyte_cdk.models import (
13
13
  AirbyteLogMessage,
@@ -78,7 +78,7 @@ class AirbyteLogFormatter(logging.Formatter):
78
78
  log_message = AirbyteMessage(
79
79
  type=Type.LOG, log=AirbyteLogMessage(level=airbyte_level, message=message)
80
80
  )
81
- return orjson.dumps(AirbyteMessageSerializer.dump(log_message)).decode() # type: ignore[no-any-return] # orjson.dumps(message).decode() always returns string
81
+ return orjson.dumps(AirbyteMessageSerializer.dump(log_message)).decode()
82
82
 
83
83
  @staticmethod
84
84
  def extract_extra_args_from_record(record: logging.LogRecord) -> Mapping[str, Any]:
@@ -200,7 +200,7 @@ class AbstractSource(Source, ABC):
200
200
  if len(stream_name_to_exception) > 0:
201
201
  error_message = generate_failed_streams_error_message(
202
202
  {key: [value] for key, value in stream_name_to_exception.items()}
203
- ) # type: ignore # for some reason, mypy can't figure out the types for key and value
203
+ )
204
204
  logger.info(error_message)
205
205
  # We still raise at least one exception when a stream raises an exception because the platform currently relies
206
206
  # on a non-zero exit code to determine if a sync attempt has failed. We also raise the exception as a config_error
@@ -24,4 +24,4 @@ class BaseConfig(BaseModel):
24
24
  rename_key(schema, old_key="anyOf", new_key="oneOf") # UI supports only oneOf
25
25
  expand_refs(schema)
26
26
  schema.pop("description", None) # description added from the docstring
27
- return schema # type: ignore[no-any-return]
27
+ return schema
@@ -4,7 +4,7 @@
4
4
 
5
5
  import copy
6
6
  from dataclasses import dataclass
7
- from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union
7
+ from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union, cast
8
8
 
9
9
  from airbyte_cdk.models import (
10
10
  AirbyteMessage,
@@ -15,6 +15,7 @@ from airbyte_cdk.models import (
15
15
  StreamDescriptor,
16
16
  )
17
17
  from airbyte_cdk.models import Type as MessageType
18
+ from airbyte_cdk.models.airbyte_protocol import AirbyteGlobalState, AirbyteStateBlob
18
19
 
19
20
 
20
21
  @dataclass(frozen=True)
@@ -118,8 +119,12 @@ class ConnectorStateManager:
118
119
  is_global = cls._is_global_state(state)
119
120
 
120
121
  if is_global:
121
- global_state = state[0].global_ # type: ignore # We verified state is a list in _is_global_state
122
- shared_state = copy.deepcopy(global_state.shared_state, {}) # type: ignore[union-attr] # global_state has shared_state
122
+ # We already validate that this is a global state message, not None:
123
+ global_state = cast(AirbyteGlobalState, state[0].global_)
124
+ # global_state has shared_state, also not None:
125
+ shared_state: AirbyteStateBlob = cast(
126
+ AirbyteStateBlob, copy.deepcopy(global_state.shared_state, {})
127
+ )
123
128
  streams = {
124
129
  HashableStreamDescriptor(
125
130
  name=per_stream_state.stream_descriptor.name,
@@ -131,7 +136,7 @@ class ConnectorStateManager:
131
136
  else:
132
137
  streams = {
133
138
  HashableStreamDescriptor(
134
- name=per_stream_state.stream.stream_descriptor.name,
139
+ name=per_stream_state.stream.stream_descriptor.name, # type: ignore[union-attr] # stream has stream_descriptor
135
140
  namespace=per_stream_state.stream.stream_descriptor.namespace, # type: ignore[union-attr] # stream has stream_descriptor
136
141
  ): per_stream_state.stream.stream_state # type: ignore[union-attr] # stream has stream_state
137
142
  for per_stream_state in state
@@ -135,7 +135,7 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
135
135
  return self.grant_type.eval(self.config) # type: ignore # eval returns a string in this context
136
136
 
137
137
  def get_refresh_request_body(self) -> Mapping[str, Any]:
138
- return self._refresh_request_body.eval(self.config) # type: ignore # eval should return a Mapping in this context
138
+ return self._refresh_request_body.eval(self.config)
139
139
 
140
140
  def get_token_expiry_date(self) -> pendulum.DateTime:
141
141
  return self._token_expiry_date # type: ignore # _token_expiry_date is a pendulum.DateTime. It is never None despite what mypy thinks
@@ -28,7 +28,12 @@ class SelectiveAuthenticator(DeclarativeAuthenticator):
28
28
  **kwargs: Any,
29
29
  ) -> DeclarativeAuthenticator:
30
30
  try:
31
- selected_key = str(dpath.get(config, authenticator_selection_path))
31
+ selected_key = str(
32
+ dpath.get(
33
+ config, # type: ignore [arg-type] # Dpath wants mutable mapping but doesn't need it.
34
+ authenticator_selection_path,
35
+ )
36
+ )
32
37
  except KeyError as err:
33
38
  raise ValueError(
34
39
  "The path from `authenticator_selection_path` is not found in the config."
@@ -20,6 +20,9 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
20
20
  ClientSideIncrementalRecordFilterDecorator,
21
21
  )
22
22
  from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
23
+ from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
24
+ PerPartitionWithGlobalCursor,
25
+ )
23
26
  from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
24
27
  from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
25
28
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
@@ -56,9 +59,8 @@ from airbyte_cdk.sources.types import Config, StreamState
56
59
 
57
60
 
58
61
  class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
59
- # By default, we defer to a value of 2. A value lower than than could cause a PartitionEnqueuer to be stuck in a state of deadlock
60
- # because it has hit the limit of futures but not partition reader is consuming them.
61
- SINGLE_THREADED_CONCURRENCY_LEVEL = 2
62
+ # By default, we defer to a value of 1 which represents running a connector using the Concurrent CDK engine on only one thread.
63
+ SINGLE_THREADED_CONCURRENCY_LEVEL = 1
62
64
 
63
65
  def __init__(
64
66
  self,
@@ -79,9 +81,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
79
81
  emit_connector_builder_messages=emit_connector_builder_messages,
80
82
  disable_resumable_full_refresh=True,
81
83
  )
82
- self._config = config
83
- self._concurrent_streams: Optional[List[AbstractStream]] = None
84
- self._synchronous_streams: Optional[List[Stream]] = None
85
84
 
86
85
  super().__init__(
87
86
  source_config=source_config,
@@ -92,6 +91,21 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
92
91
 
93
92
  self._state = state
94
93
 
94
+ self._concurrent_streams: Optional[List[AbstractStream]]
95
+ self._synchronous_streams: Optional[List[Stream]]
96
+
97
+ # If the connector command was SPEC, there is no incoming config, and we cannot instantiate streams because
98
+ # they might depend on it. Ideally we want to have a static method on this class to get the spec without
99
+ # any other arguments, but the existing entrypoint.py isn't designed to support this. Just noting this
100
+ # for our future improvements to the CDK.
101
+ if config:
102
+ self._concurrent_streams, self._synchronous_streams = self._group_streams(
103
+ config=config or {}
104
+ )
105
+ else:
106
+ self._concurrent_streams = None
107
+ self._synchronous_streams = None
108
+
95
109
  concurrency_level_from_manifest = self._source_config.get("concurrency_level")
96
110
  if concurrency_level_from_manifest:
97
111
  concurrency_level_component = self._constructor.create_component(
@@ -110,29 +124,16 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
110
124
  ) # Partition_generation iterates using range based on this value. If this is floored to zero we end up in a dead lock during start up
111
125
  else:
112
126
  concurrency_level = self.SINGLE_THREADED_CONCURRENCY_LEVEL
113
- initial_number_of_partitions_to_generate = self.SINGLE_THREADED_CONCURRENCY_LEVEL // 2
127
+ initial_number_of_partitions_to_generate = self.SINGLE_THREADED_CONCURRENCY_LEVEL
114
128
 
115
129
  self._concurrent_source = ConcurrentSource.create(
116
130
  num_workers=concurrency_level,
117
131
  initial_number_of_partitions_to_generate=initial_number_of_partitions_to_generate,
118
132
  logger=self.logger,
119
133
  slice_logger=self._slice_logger,
120
- message_repository=self.message_repository, # type: ignore # message_repository is always instantiated with a value by factory
134
+ message_repository=self.message_repository,
121
135
  )
122
136
 
123
- def _actually_group(self) -> None:
124
- # If the connector command was SPEC, there is no incoming config, and we cannot instantiate streams because
125
- # they might depend on it. Ideally we want to have a static method on this class to get the spec without
126
- # any other arguments, but the existing entrypoint.py isn't designed to support this. Just noting this
127
- # for our future improvements to the CDK.
128
- if self._config:
129
- self._concurrent_streams, self._synchronous_streams = self._group_streams(
130
- config=self._config or {}
131
- )
132
- else:
133
- self._concurrent_streams = None
134
- self._synchronous_streams = None
135
-
136
137
  def read(
137
138
  self,
138
139
  logger: logging.Logger,
@@ -142,9 +143,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
142
143
  ) -> Iterator[AirbyteMessage]:
143
144
  # ConcurrentReadProcessor pops streams that are finished being read so before syncing, the names of the concurrent
144
145
  # streams must be saved so that they can be removed from the catalog before starting synchronous streams
145
- if self._concurrent_streams is None:
146
- self._actually_group()
147
-
148
146
  if self._concurrent_streams:
149
147
  concurrent_stream_names = set(
150
148
  [concurrent_stream.name for concurrent_stream in self._concurrent_streams]
@@ -170,9 +168,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
170
168
  yield from super().read(logger, config, filtered_catalog, state)
171
169
 
172
170
  def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog:
173
- if self._concurrent_streams is None:
174
- self._actually_group()
175
-
176
171
  concurrent_streams = self._concurrent_streams or []
177
172
  synchronous_streams = self._synchronous_streams or []
178
173
  return AirbyteCatalog(
@@ -201,7 +196,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
201
196
  state_manager = ConnectorStateManager(state=self._state) # type: ignore # state is always in the form of List[AirbyteStateMessage]. The ConnectorStateManager should use generics, but this can be done later
202
197
 
203
198
  name_to_stream_mapping = {
204
- stream["name"]: stream for stream in self._initialize_cache_for_parent_streams(self.resolved_manifest["streams"])
199
+ stream["name"]: stream for stream in self.resolved_manifest["streams"]
205
200
  }
206
201
 
207
202
  for declarative_stream in self.streams(config=config):
@@ -314,6 +309,59 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
314
309
  cursor=final_state_cursor,
315
310
  )
316
311
  )
312
+ elif (
313
+ incremental_sync_component_definition
314
+ and incremental_sync_component_definition.get("type", "")
315
+ == DatetimeBasedCursorModel.__name__
316
+ and self._stream_supports_concurrent_partition_processing(
317
+ declarative_stream=declarative_stream
318
+ )
319
+ and hasattr(declarative_stream.retriever, "stream_slicer")
320
+ and isinstance(declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor)
321
+ ):
322
+ stream_state = state_manager.get_stream_state(
323
+ stream_name=declarative_stream.name, namespace=declarative_stream.namespace
324
+ )
325
+ partition_router = declarative_stream.retriever.stream_slicer._partition_router
326
+
327
+ cursor = self._constructor.create_concurrent_cursor_from_perpartition_cursor(
328
+ state_manager=state_manager,
329
+ model_type=DatetimeBasedCursorModel,
330
+ component_definition=incremental_sync_component_definition,
331
+ stream_name=declarative_stream.name,
332
+ stream_namespace=declarative_stream.namespace,
333
+ config=config or {},
334
+ stream_state=stream_state,
335
+ partition_router=partition_router,
336
+ )
337
+
338
+
339
+ partition_generator = StreamSlicerPartitionGenerator(
340
+ DeclarativePartitionFactory(
341
+ declarative_stream.name,
342
+ declarative_stream.get_json_schema(),
343
+ self._retriever_factory(
344
+ name_to_stream_mapping[declarative_stream.name],
345
+ config,
346
+ stream_state,
347
+ ),
348
+ self.message_repository,
349
+ ),
350
+ cursor,
351
+ )
352
+
353
+ concurrent_streams.append(
354
+ DefaultStream(
355
+ partition_generator=partition_generator,
356
+ name=declarative_stream.name,
357
+ json_schema=declarative_stream.get_json_schema(),
358
+ availability_strategy=AlwaysAvailableAvailabilityStrategy(),
359
+ primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
360
+ cursor_field=cursor.cursor_field.cursor_field_key,
361
+ logger=self.logger,
362
+ cursor=cursor,
363
+ )
364
+ )
317
365
  else:
318
366
  synchronous_streams.append(declarative_stream)
319
367
  else:
@@ -41,12 +41,12 @@ class MinMaxDatetime:
41
41
  self.datetime = InterpolatedString.create(self.datetime, parameters=parameters or {})
42
42
  self._parser = DatetimeParser()
43
43
  self.min_datetime = (
44
- InterpolatedString.create(self.min_datetime, parameters=parameters)
44
+ InterpolatedString.create(self.min_datetime, parameters=parameters) # type: ignore [assignment] # expression has type "InterpolatedString | None", variable has type "InterpolatedString | str"
45
45
  if self.min_datetime
46
46
  else None
47
47
  ) # type: ignore
48
48
  self.max_datetime = (
49
- InterpolatedString.create(self.max_datetime, parameters=parameters)
49
+ InterpolatedString.create(self.max_datetime, parameters=parameters) # type: ignore [assignment] # expression has type "InterpolatedString | None", variable has type "InterpolatedString | str"
50
50
  if self.max_datetime
51
51
  else None
52
52
  ) # type: ignore
@@ -66,7 +66,13 @@ class MinMaxDatetime:
66
66
  datetime_format = "%Y-%m-%dT%H:%M:%S.%f%z"
67
67
 
68
68
  time = self._parser.parse(
69
- str(self.datetime.eval(config, **additional_parameters)), datetime_format
69
+ str(
70
+ self.datetime.eval( # type: ignore[union-attr] # str has no attribute "eval"
71
+ config,
72
+ **additional_parameters,
73
+ )
74
+ ),
75
+ datetime_format,
70
76
  ) # type: ignore # datetime is always cast to an interpolated string
71
77
 
72
78
  if self.min_datetime:
@@ -105,7 +111,7 @@ class MinMaxDatetime:
105
111
  if isinstance(interpolated_string_or_min_max_datetime, InterpolatedString) or isinstance(
106
112
  interpolated_string_or_min_max_datetime, str
107
113
  ):
108
- return MinMaxDatetime(
114
+ return MinMaxDatetime( # type: ignore [call-arg]
109
115
  datetime=interpolated_string_or_min_max_datetime, parameters=parameters
110
116
  )
111
117
  else:
@@ -2057,7 +2057,7 @@ definitions:
2057
2057
  The DeclarativeOAuth Specific URL templated string to obtain the `access_token`, `refresh_token` etc.
2058
2058
  The placeholders are replaced during the processing to provide neccessary values.
2059
2059
  examples:
2060
- - access_token_url: https://auth.host.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}
2060
+ - access_token_url: https://auth.host.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}
2061
2061
  access_token_headers:
2062
2062
  title: (Optional) DeclarativeOAuth Access Token Headers
2063
2063
  type: object
@@ -2065,9 +2065,10 @@ definitions:
2065
2065
  description: |-
2066
2066
  The DeclarativeOAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.
2067
2067
  examples:
2068
- - access_token_headers: {
2069
- "Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}"
2070
- }
2068
+ - access_token_headers:
2069
+ {
2070
+ "Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}",
2071
+ }
2071
2072
  access_token_params:
2072
2073
  title: (Optional) DeclarativeOAuth Access Token Query Params (Json Encoded)
2073
2074
  type: object
@@ -2076,18 +2077,19 @@ definitions:
2076
2077
  The DeclarativeOAuth Specific optional query parameters to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.
2077
2078
  When this property is provided, the query params will be encoded as `Json` and included in the outgoing API request.
2078
2079
  examples:
2079
- - access_token_params: {
2080
- "{auth_code_key}": "{{auth_code_key}}",
2081
- "{client_id_key}": "{{client_id_key}}",
2082
- "{client_secret_key}": "{{client_secret_key}}"
2083
- }
2080
+ - access_token_params:
2081
+ {
2082
+ "{auth_code_key}": "{{auth_code_key}}",
2083
+ "{client_id_key}": "{{client_id_key}}",
2084
+ "{client_secret_key}": "{{client_secret_key}}",
2085
+ }
2084
2086
  extract_output:
2085
2087
  title: DeclarativeOAuth Extract Output
2086
2088
  type: array
2087
2089
  items:
2088
2090
  type: string
2089
2091
  description: |-
2090
- The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.
2092
+ The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.
2091
2093
  examples:
2092
2094
  - extract_output: ["access_token", "refresh_token", "other_field"]
2093
2095
  state:
@@ -2099,17 +2101,14 @@ definitions:
2099
2101
  - max
2100
2102
  description: |-
2101
2103
  The DeclarativeOAuth Specific object to provide the criteria of how the `state` query param should be constructed,
2102
- including length and complexity.
2104
+ including length and complexity.
2103
2105
  properties:
2104
2106
  min:
2105
2107
  type: integer
2106
2108
  max:
2107
2109
  type: integer
2108
2110
  examples:
2109
- - state: {
2110
- "min": 7,
2111
- "max": 128,
2112
- }
2111
+ - state: { "min": 7, "max": 128 }
2113
2112
  client_id_key:
2114
2113
  title: (Optional) DeclarativeOAuth Client ID Key Override
2115
2114
  type: string
@@ -2135,14 +2134,14 @@ definitions:
2135
2134
  title: (Optional) DeclarativeOAuth State Key Override
2136
2135
  type: string
2137
2136
  description: |-
2138
- The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider.
2137
+ The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider.
2139
2138
  examples:
2140
2139
  - state_key: "my_custom_state_key_key_name"
2141
2140
  auth_code_key:
2142
2141
  title: (Optional) DeclarativeOAuth Auth Code Key Override
2143
2142
  type: string
2144
2143
  description: |-
2145
- The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider.
2144
+ The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider.
2146
2145
  examples:
2147
2146
  - auth_code_key: "my_custom_auth_code_key_name"
2148
2147
  redirect_uri_key:
@@ -14,5 +14,8 @@ class NoopDecoder(Decoder):
14
14
  def is_stream_response(self) -> bool:
15
15
  return False
16
16
 
17
- def decode(self, response: requests.Response) -> Generator[Mapping[str, Any], None, None]:
17
+ def decode( # type: ignore[override] # Signature doesn't match base class
18
+ self,
19
+ response: requests.Response,
20
+ ) -> Generator[Mapping[str, Any], None, None]:
18
21
  yield from [{}]