airbyte-cdk 6.8.1rc9__py3-none-any.whl → 6.8.2.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +11 -5
  2. airbyte_cdk/config_observation.py +1 -1
  3. airbyte_cdk/connector_builder/main.py +1 -1
  4. airbyte_cdk/connector_builder/message_grouper.py +10 -10
  5. airbyte_cdk/destinations/destination.py +1 -1
  6. airbyte_cdk/destinations/vector_db_based/embedder.py +2 -2
  7. airbyte_cdk/destinations/vector_db_based/writer.py +12 -4
  8. airbyte_cdk/entrypoint.py +7 -6
  9. airbyte_cdk/logger.py +2 -2
  10. airbyte_cdk/sources/abstract_source.py +1 -1
  11. airbyte_cdk/sources/config.py +1 -1
  12. airbyte_cdk/sources/connector_state_manager.py +9 -4
  13. airbyte_cdk/sources/declarative/auth/oauth.py +1 -1
  14. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +6 -1
  15. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +76 -28
  16. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +10 -4
  17. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +16 -17
  18. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +4 -1
  19. airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
  20. airbyte_cdk/sources/declarative/incremental/__init__.py +3 -0
  21. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +270 -0
  22. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +8 -6
  23. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +9 -0
  24. airbyte_cdk/sources/declarative/interpolation/jinja.py +35 -36
  25. airbyte_cdk/sources/declarative/interpolation/macros.py +1 -1
  26. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +71 -17
  27. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +13 -7
  28. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +1 -1
  29. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +8 -6
  30. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +1 -1
  31. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +2 -2
  32. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +1 -1
  33. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +5 -2
  34. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
  35. airbyte_cdk/sources/declarative/spec/spec.py +1 -1
  36. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +0 -1
  37. airbyte_cdk/sources/embedded/base_integration.py +3 -2
  38. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +12 -4
  39. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +18 -7
  40. airbyte_cdk/sources/file_based/file_types/avro_parser.py +14 -11
  41. airbyte_cdk/sources/file_based/file_types/csv_parser.py +3 -3
  42. airbyte_cdk/sources/file_based/file_types/excel_parser.py +11 -5
  43. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +1 -1
  44. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +2 -2
  45. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +6 -3
  46. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +1 -1
  47. airbyte_cdk/sources/http_logger.py +3 -3
  48. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +5 -2
  49. airbyte_cdk/sources/streams/concurrent/adapters.py +6 -3
  50. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +9 -3
  51. airbyte_cdk/sources/streams/concurrent/cursor.py +10 -1
  52. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +2 -2
  53. airbyte_cdk/sources/streams/core.py +17 -14
  54. airbyte_cdk/sources/streams/http/http.py +19 -19
  55. airbyte_cdk/sources/streams/http/http_client.py +4 -48
  56. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
  57. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +62 -33
  58. airbyte_cdk/sources/utils/record_helper.py +1 -1
  59. airbyte_cdk/sources/utils/schema_helpers.py +1 -1
  60. airbyte_cdk/sources/utils/transform.py +34 -15
  61. airbyte_cdk/test/entrypoint_wrapper.py +11 -6
  62. airbyte_cdk/test/mock_http/response_builder.py +1 -1
  63. airbyte_cdk/utils/airbyte_secrets_utils.py +1 -1
  64. airbyte_cdk/utils/event_timing.py +10 -10
  65. airbyte_cdk/utils/message_utils.py +4 -3
  66. airbyte_cdk/utils/spec_schema_transformations.py +3 -2
  67. airbyte_cdk/utils/traced_exception.py +14 -12
  68. airbyte_cdk-6.8.2.dev1.dist-info/METADATA +111 -0
  69. {airbyte_cdk-6.8.1rc9.dist-info → airbyte_cdk-6.8.2.dev1.dist-info}/RECORD +72 -71
  70. airbyte_cdk-6.8.1rc9.dist-info/METADATA +0 -307
  71. {airbyte_cdk-6.8.1rc9.dist-info → airbyte_cdk-6.8.2.dev1.dist-info}/LICENSE.txt +0 -0
  72. {airbyte_cdk-6.8.1rc9.dist-info → airbyte_cdk-6.8.2.dev1.dist-info}/WHEEL +0 -0
  73. {airbyte_cdk-6.8.1rc9.dist-info → airbyte_cdk-6.8.2.dev1.dist-info}/entry_points.txt +0 -0
@@ -130,11 +130,11 @@ class SubstreamPartitionRouter(PartitionRouter):
130
130
  if value:
131
131
  params.update(
132
132
  {
133
- parent_config.request_option.field_name.eval(
133
+ parent_config.request_option.field_name.eval( # type: ignore [union-attr]
134
134
  config=self.config
135
135
  ): value
136
136
  }
137
- ) # type: ignore # field_name is always casted to an interpolated string
137
+ )
138
138
  return params
139
139
 
140
140
  def stream_slices(self) -> Iterable[StreamSlice]:
@@ -162,9 +162,9 @@ class SubstreamPartitionRouter(PartitionRouter):
162
162
  extra_fields = None
163
163
  if parent_stream_config.extra_fields:
164
164
  extra_fields = [
165
- [field_path_part.eval(self.config) for field_path_part in field_path]
165
+ [field_path_part.eval(self.config) for field_path_part in field_path] # type: ignore [union-attr]
166
166
  for field_path in parent_stream_config.extra_fields
167
- ] # type: ignore # extra_fields is always casted to an interpolated string
167
+ ]
168
168
 
169
169
  # read_stateless() assumes the parent is not concurrent. This is currently okay since the concurrent CDK does
170
170
  # not support either substreams or RFR, but something that needs to be considered once we do
@@ -192,7 +192,10 @@ class SubstreamPartitionRouter(PartitionRouter):
192
192
  message=f"Parent stream returned records as invalid type {type(parent_record)}"
193
193
  )
194
194
  try:
195
- partition_value = dpath.get(parent_record, parent_field)
195
+ partition_value = dpath.get(
196
+ parent_record, # type: ignore [arg-type]
197
+ parent_field,
198
+ )
196
199
  except KeyError:
197
200
  continue
198
201
 
@@ -228,7 +231,10 @@ class SubstreamPartitionRouter(PartitionRouter):
228
231
  if extra_fields:
229
232
  for extra_field_path in extra_fields:
230
233
  try:
231
- extra_field_value = dpath.get(parent_record, extra_field_path)
234
+ extra_field_value = dpath.get(
235
+ parent_record, # type: ignore [arg-type]
236
+ extra_field_path,
237
+ )
232
238
  self.logger.debug(
233
239
  f"Extracted extra_field_path: {extra_field_path} with value: {extra_field_value}"
234
240
  )
@@ -291,7 +297,7 @@ class SubstreamPartitionRouter(PartitionRouter):
291
297
  if not parent_state and incremental_dependency:
292
298
  # Attempt to retrieve child state
293
299
  substream_state = list(stream_state.values())
294
- substream_state = substream_state[0] if substream_state else {}
300
+ substream_state = substream_state[0] if substream_state else {} # type: ignore [assignment] # Incorrect type for assignment
295
301
  parent_state = {}
296
302
 
297
303
  # Copy child state to parent streams with incremental dependencies
@@ -141,7 +141,7 @@ class DefaultErrorHandler(ErrorHandler):
141
141
  for backoff_strategy in self.backoff_strategies:
142
142
  backoff = backoff_strategy.backoff_time(
143
143
  response_or_exception=response_or_exception, attempt_count=attempt_count
144
- ) # type: ignore # attempt_count maintained for compatibility with low code CDK
144
+ )
145
145
  if backoff:
146
146
  return backoff
147
147
  return backoff
@@ -151,21 +151,23 @@ class HttpResponseFilter:
151
151
  :param response: The HTTP response which can be used during interpolation
152
152
  :return: The evaluated error message string to be emitted
153
153
  """
154
- return self.error_message.eval(
154
+ return self.error_message.eval( # type: ignore [no-any-return, union-attr]
155
155
  self.config, response=self._safe_response_json(response), headers=response.headers
156
- ) # type: ignore # error_message is always cast to an interpolated string
156
+ )
157
157
 
158
158
  def _response_matches_predicate(self, response: requests.Response) -> bool:
159
159
  return (
160
160
  bool(
161
- self.predicate.condition
162
- and self.predicate.eval(
163
- None, response=self._safe_response_json(response), headers=response.headers
161
+ self.predicate.condition # type: ignore [union-attr]
162
+ and self.predicate.eval( # type: ignore [union-attr]
163
+ None, # type: ignore [arg-type]
164
+ response=self._safe_response_json(response),
165
+ headers=response.headers,
164
166
  )
165
167
  )
166
168
  if self.predicate
167
169
  else False
168
- ) # type: ignore # predicate is always cast to an interpolated string
170
+ )
169
171
 
170
172
  def _response_contains_error_message(self, response: requests.Response) -> bool:
171
173
  if not self.error_message_contains:
@@ -194,7 +194,7 @@ class DefaultPaginator(Paginator):
194
194
  and self.pagination_strategy.get_page_size()
195
195
  and self.page_size_option.inject_into == option_type
196
196
  ):
197
- options[self.page_size_option.field_name.eval(config=self.config)] = (
197
+ options[self.page_size_option.field_name.eval(config=self.config)] = ( # type: ignore [union-attr]
198
198
  self.pagination_strategy.get_page_size()
199
199
  ) # type: ignore # field_name is always cast to an interpolated string
200
200
  return options
@@ -85,7 +85,7 @@ class DatetimeBasedRequestOptionsProvider(RequestOptionsProvider):
85
85
  self._partition_field_start.eval(self.config)
86
86
  )
87
87
  if self.end_time_option and self.end_time_option.inject_into == option_type:
88
- options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get(
88
+ options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore [union-attr]
89
89
  self._partition_field_end.eval(self.config)
90
- ) # type: ignore # field_name is always casted to an interpolated string
90
+ )
91
91
  return options
@@ -5,7 +5,7 @@
5
5
  from dataclasses import InitVar, dataclass, field
6
6
  from typing import Any, Mapping, MutableMapping, Optional, Union
7
7
 
8
- from deprecated import deprecated
8
+ from typing_extensions import deprecated
9
9
 
10
10
  from airbyte_cdk.sources.declarative.interpolation.interpolated_nested_mapping import NestedMapping
11
11
  from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_nested_request_input_provider import (
@@ -4,7 +4,7 @@
4
4
  from dataclasses import InitVar, dataclass, field
5
5
  from typing import Any, Callable, Iterable, Mapping, Optional
6
6
 
7
- from deprecated.classic import deprecated
7
+ from typing_extensions import deprecated
8
8
 
9
9
  from airbyte_cdk.models import FailureType
10
10
  from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
@@ -21,7 +21,10 @@ from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
21
21
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
22
22
 
23
23
 
24
- @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
24
+ @deprecated(
25
+ "This class is experimental. Use at your own risk.",
26
+ category=ExperimentalClassWarning,
27
+ )
25
28
  @dataclass
26
29
  class AsyncRetriever(Retriever):
27
30
  config: Config
@@ -178,7 +178,7 @@ class SimpleRetriever(Retriever):
178
178
  stream_slice,
179
179
  next_page_token,
180
180
  self._paginator.get_request_headers,
181
- self.stream_slicer.get_request_headers,
181
+ self.request_option_provider.get_request_headers,
182
182
  )
183
183
  if isinstance(headers, str):
184
184
  raise ValueError("Request headers cannot be a string")
@@ -9,7 +9,7 @@ from airbyte_cdk.models import (
9
9
  AdvancedAuth,
10
10
  ConnectorSpecification,
11
11
  ConnectorSpecificationSerializer,
12
- ) # type: ignore [attr-defined]
12
+ )
13
13
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import AuthFlow
14
14
 
15
15
 
@@ -38,7 +38,6 @@ class DeclarativePartitionFactory:
38
38
  stream_slice,
39
39
  )
40
40
 
41
-
42
41
  class DeclarativePartition(Partition):
43
42
  def __init__(
44
43
  self,
@@ -52,8 +52,9 @@ class BaseEmbeddedIntegration(ABC, Generic[TConfig, TOutput]):
52
52
  for message in self.source.read(self.config, configured_catalog, state):
53
53
  if message.type == Type.RECORD:
54
54
  output = self._handle_record(
55
- message.record, get_defined_id(stream, message.record.data)
56
- ) # type: ignore[union-attr] # record has `data`
55
+ message.record,
56
+ get_defined_id(stream, message.record.data), # type: ignore[union-attr, arg-type]
57
+ )
57
58
  if output:
58
59
  yield output
59
60
  elif message.type is Type.STATE and message.state:
@@ -2,6 +2,8 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
+ from __future__ import annotations
6
+
5
7
  import logging
6
8
  from abc import abstractmethod
7
9
  from typing import TYPE_CHECKING, Optional, Tuple
@@ -22,8 +24,11 @@ if TYPE_CHECKING:
22
24
 
23
25
  class AbstractFileBasedAvailabilityStrategy(AvailabilityStrategy):
24
26
  @abstractmethod
25
- def check_availability(
26
- self, stream: Stream, logger: logging.Logger, _: Optional[Source]
27
+ def check_availability( # type: ignore[override] # Signature doesn't match base class
28
+ self,
29
+ stream: Stream,
30
+ logger: logging.Logger,
31
+ _: Optional[Source],
27
32
  ) -> Tuple[bool, Optional[str]]:
28
33
  """
29
34
  Perform a connection check for the stream.
@@ -34,7 +39,10 @@ class AbstractFileBasedAvailabilityStrategy(AvailabilityStrategy):
34
39
 
35
40
  @abstractmethod
36
41
  def check_availability_and_parsability(
37
- self, stream: "AbstractFileBasedStream", logger: logging.Logger, _: Optional[Source]
42
+ self,
43
+ stream: AbstractFileBasedStream,
44
+ logger: logging.Logger,
45
+ _: Optional[Source],
38
46
  ) -> Tuple[bool, Optional[str]]:
39
47
  """
40
48
  Performs a connection check for the stream, as well as additional checks that
@@ -46,7 +54,7 @@ class AbstractFileBasedAvailabilityStrategy(AvailabilityStrategy):
46
54
 
47
55
 
48
56
  class AbstractFileBasedAvailabilityStrategyWrapper(AbstractAvailabilityStrategy):
49
- def __init__(self, stream: "AbstractFileBasedStream"):
57
+ def __init__(self, stream: AbstractFileBasedStream) -> None:
50
58
  self.stream = stream
51
59
 
52
60
  def check_availability(self, logger: logging.Logger) -> StreamAvailability:
@@ -2,6 +2,8 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
+ from __future__ import annotations
6
+
5
7
  import logging
6
8
  import traceback
7
9
  from typing import TYPE_CHECKING, Optional, Tuple
@@ -25,12 +27,15 @@ if TYPE_CHECKING:
25
27
 
26
28
 
27
29
  class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy):
28
- def __init__(self, stream_reader: AbstractFileBasedStreamReader):
30
+ def __init__(self, stream_reader: AbstractFileBasedStreamReader) -> None:
29
31
  self.stream_reader = stream_reader
30
32
 
31
- def check_availability(
32
- self, stream: "AbstractFileBasedStream", logger: logging.Logger, _: Optional[Source]
33
- ) -> Tuple[bool, Optional[str]]: # type: ignore[override]
33
+ def check_availability( # type: ignore[override] # Signature doesn't match base class
34
+ self,
35
+ stream: AbstractFileBasedStream,
36
+ logger: logging.Logger,
37
+ _: Optional[Source],
38
+ ) -> Tuple[bool, Optional[str]]:
34
39
  """
35
40
  Perform a connection check for the stream (verify that we can list files from the stream).
36
41
 
@@ -44,7 +49,10 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
44
49
  return True, None
45
50
 
46
51
  def check_availability_and_parsability(
47
- self, stream: "AbstractFileBasedStream", logger: logging.Logger, _: Optional[Source]
52
+ self,
53
+ stream: AbstractFileBasedStream,
54
+ logger: logging.Logger,
55
+ _: Optional[Source],
48
56
  ) -> Tuple[bool, Optional[str]]:
49
57
  """
50
58
  Perform a connection check for the stream.
@@ -82,7 +90,7 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
82
90
 
83
91
  return True, None
84
92
 
85
- def _check_list_files(self, stream: "AbstractFileBasedStream") -> RemoteFile:
93
+ def _check_list_files(self, stream: AbstractFileBasedStream) -> RemoteFile:
86
94
  """
87
95
  Check that we can list files from the stream.
88
96
 
@@ -102,7 +110,10 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
102
110
  return file
103
111
 
104
112
  def _check_parse_record(
105
- self, stream: "AbstractFileBasedStream", file: RemoteFile, logger: logging.Logger
113
+ self,
114
+ stream: AbstractFileBasedStream,
115
+ file: RemoteFile,
116
+ logger: logging.Logger,
106
117
  ) -> None:
107
118
  parser = stream.get_parser()
108
119
 
@@ -3,7 +3,7 @@
3
3
  #
4
4
 
5
5
  import logging
6
- from typing import Any, Dict, Iterable, Mapping, Optional, Tuple
6
+ from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, cast
7
7
 
8
8
  import fastavro
9
9
 
@@ -64,18 +64,20 @@ class AvroParser(FileTypeParser):
64
64
  raise ValueError(f"Expected ParquetFormat, got {avro_format}")
65
65
 
66
66
  with stream_reader.open_file(file, self.file_read_mode, self.ENCODING, logger) as fp:
67
- avro_reader = fastavro.reader(fp)
67
+ avro_reader = fastavro.reader(fp) # type: ignore [arg-type]
68
68
  avro_schema = avro_reader.writer_schema
69
- if not avro_schema["type"] == "record":
70
- unsupported_type = avro_schema["type"]
69
+ if not avro_schema["type"] == "record": # type: ignore [index, call-overload]
70
+ unsupported_type = avro_schema["type"] # type: ignore [index, call-overload]
71
71
  raise ValueError(
72
72
  f"Only record based avro files are supported. Found {unsupported_type}"
73
73
  )
74
74
  json_schema = {
75
- field["name"]: AvroParser._convert_avro_type_to_json(
76
- avro_format, field["name"], field["type"]
75
+ field["name"]: AvroParser._convert_avro_type_to_json( # type: ignore [index]
76
+ avro_format,
77
+ field["name"], # type: ignore [index]
78
+ field["type"], # type: ignore [index]
77
79
  )
78
- for field in avro_schema["fields"]
80
+ for field in avro_schema["fields"] # type: ignore [index, call-overload]
79
81
  }
80
82
  return json_schema
81
83
 
@@ -180,18 +182,19 @@ class AvroParser(FileTypeParser):
180
182
  line_no = 0
181
183
  try:
182
184
  with stream_reader.open_file(file, self.file_read_mode, self.ENCODING, logger) as fp:
183
- avro_reader = fastavro.reader(fp)
185
+ avro_reader = fastavro.reader(fp) # type: ignore [arg-type]
184
186
  schema = avro_reader.writer_schema
185
187
  schema_field_name_to_type = {
186
- field["name"]: field["type"] for field in schema["fields"]
188
+ field["name"]: cast(dict[str, Any], field["type"]) # type: ignore [index]
189
+ for field in schema["fields"] # type: ignore [index, call-overload] # If schema is not dict, it is not subscriptable by strings
187
190
  }
188
191
  for record in avro_reader:
189
192
  line_no += 1
190
193
  yield {
191
194
  record_field: self._to_output_value(
192
195
  avro_format,
193
- schema_field_name_to_type[record_field],
194
- record[record_field],
196
+ schema_field_name_to_type[record_field], # type: ignore [index] # Any not subscriptable
197
+ record[record_field], # type: ignore [index] # Any not subscriptable
195
198
  )
196
199
  for record_field, record_value in schema_field_name_to_type.items()
197
200
  }
@@ -12,7 +12,7 @@ from io import IOBase
12
12
  from typing import Any, Callable, Dict, Generator, Iterable, List, Mapping, Optional, Set, Tuple
13
13
  from uuid import uuid4
14
14
 
15
- from orjson import orjson
15
+ import orjson
16
16
 
17
17
  from airbyte_cdk.models import FailureType
18
18
  from airbyte_cdk.sources.file_based.config.csv_format import (
@@ -117,7 +117,7 @@ class _CsvReader:
117
117
  """
118
118
  # Note that this method assumes the dialect has already been registered if we're parsing the headers
119
119
  if isinstance(config_format.header_definition, CsvHeaderUserProvided):
120
- return config_format.header_definition.column_names # type: ignore # should be CsvHeaderUserProvided given the type
120
+ return config_format.header_definition.column_names
121
121
 
122
122
  if isinstance(config_format.header_definition, CsvHeaderAutogenerated):
123
123
  self._skip_rows(
@@ -229,7 +229,7 @@ class CsvParser(FileTypeParser):
229
229
  if discovered_schema:
230
230
  property_types = {
231
231
  col: prop["type"] for col, prop in discovered_schema["properties"].items()
232
- } # type: ignore # discovered_schema["properties"] is known to be a mapping
232
+ }
233
233
  deduped_property_types = CsvParser._pre_propcess_property_types(property_types)
234
234
  else:
235
235
  deduped_property_types = {}
@@ -7,10 +7,10 @@ from io import IOBase
7
7
  from pathlib import Path
8
8
  from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union
9
9
 
10
+ import orjson
10
11
  import pandas as pd
11
12
  from numpy import datetime64, issubdtype
12
13
  from numpy import dtype as dtype_
13
- from orjson import orjson
14
14
  from pydantic.v1 import BaseModel
15
15
 
16
16
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import (
@@ -69,8 +69,11 @@ class ExcelParser(FileTypeParser):
69
69
  df = self.open_and_parse_file(fp)
70
70
  for column, df_type in df.dtypes.items():
71
71
  # Choose the broadest data type if the column's data type differs in dataframes
72
- prev_frame_column_type = fields.get(column)
73
- fields[column] = self.dtype_to_json_type(prev_frame_column_type, df_type)
72
+ prev_frame_column_type = fields.get(column) # type: ignore [call-overload]
73
+ fields[column] = self.dtype_to_json_type( # type: ignore [index]
74
+ prev_frame_column_type,
75
+ df_type,
76
+ )
74
77
 
75
78
  schema = {
76
79
  field: (
@@ -136,7 +139,10 @@ class ExcelParser(FileTypeParser):
136
139
  return FileReadMode.READ_BINARY
137
140
 
138
141
  @staticmethod
139
- def dtype_to_json_type(current_type: Optional[str], dtype: dtype_) -> str:
142
+ def dtype_to_json_type(
143
+ current_type: Optional[str],
144
+ dtype: dtype_, # type: ignore [type-arg]
145
+ ) -> str:
140
146
  """
141
147
  Convert Pandas DataFrame types to Airbyte Types.
142
148
 
@@ -187,4 +193,4 @@ class ExcelParser(FileTypeParser):
187
193
  Returns:
188
194
  pd.DataFrame: Parsed data from the Excel file.
189
195
  """
190
- return pd.ExcelFile(fp, engine="calamine").parse()
196
+ return pd.ExcelFile(fp, engine="calamine").parse() # type: ignore [arg-type, call-overload, no-any-return]
@@ -6,7 +6,7 @@ import json
6
6
  import logging
7
7
  from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union
8
8
 
9
- from orjson import orjson
9
+ import orjson
10
10
 
11
11
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
12
12
  from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError
@@ -6,7 +6,7 @@ from abc import abstractmethod
6
6
  from functools import cache, cached_property, lru_cache
7
7
  from typing import Any, Dict, Iterable, List, Mapping, Optional, Type
8
8
 
9
- from deprecated import deprecated
9
+ from typing_extensions import deprecated
10
10
 
11
11
  from airbyte_cdk import AirbyteMessage
12
12
  from airbyte_cdk.models import SyncMode
@@ -179,7 +179,7 @@ class AbstractFileBasedStream(Stream):
179
179
  )
180
180
 
181
181
  @cached_property
182
- @deprecated(version="3.7.0")
182
+ @deprecated("Deprecated as of CDK version 3.7.0.")
183
183
  def availability_strategy(self) -> AbstractFileBasedAvailabilityStrategy:
184
184
  return self._availability_strategy
185
185
 
@@ -7,7 +7,7 @@ import logging
7
7
  from functools import cache, lru_cache
8
8
  from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union
9
9
 
10
- from deprecated.classic import deprecated
10
+ from typing_extensions import deprecated
11
11
 
12
12
  from airbyte_cdk.models import (
13
13
  AirbyteLogMessage,
@@ -56,7 +56,10 @@ This module contains adapters to help enabling concurrency on File-based Stream
56
56
  """
57
57
 
58
58
 
59
- @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
59
+ @deprecated(
60
+ "This class is experimental. Use at your own risk.",
61
+ category=ExperimentalClassWarning,
62
+ )
60
63
  class FileBasedStreamFacade(AbstractStreamFacade[DefaultStream], AbstractFileBasedStream):
61
64
  @classmethod
62
65
  def create_from_stream(
@@ -143,7 +146,7 @@ class FileBasedStreamFacade(AbstractStreamFacade[DefaultStream], AbstractFileBas
143
146
  return self._legacy_stream.supports_incremental
144
147
 
145
148
  @property
146
- @deprecated(version="3.7.0")
149
+ @deprecated("Deprecated as of CDK version 3.7.0.")
147
150
  def availability_strategy(self) -> AbstractFileBasedAvailabilityStrategy:
148
151
  return self._legacy_stream.availability_strategy
149
152
 
@@ -21,7 +21,7 @@ class DefaultFileBasedCursor(AbstractFileBasedCursor):
21
21
  CURSOR_FIELD = "_ab_source_file_last_modified"
22
22
 
23
23
  def __init__(self, stream_config: FileBasedStreamConfig, **_: Any):
24
- super().__init__(stream_config)
24
+ super().__init__(stream_config) # type: ignore [safe-super]
25
25
  self._file_to_datetime_history: MutableMapping[str, str] = {}
26
26
  self._time_window_if_history_is_full = timedelta(
27
27
  days=stream_config.days_to_sync_if_history_is_full
@@ -14,7 +14,7 @@ def format_http_message(
14
14
  title: str,
15
15
  description: str,
16
16
  stream_name: Optional[str],
17
- is_auxiliary: bool = None,
17
+ is_auxiliary: bool | None = None,
18
18
  ) -> LogMessage:
19
19
  request = response.request
20
20
  log_message = {
@@ -42,10 +42,10 @@ def format_http_message(
42
42
  "url": {"full": request.url},
43
43
  }
44
44
  if is_auxiliary is not None:
45
- log_message["http"]["is_auxiliary"] = is_auxiliary
45
+ log_message["http"]["is_auxiliary"] = is_auxiliary # type: ignore [index]
46
46
  if stream_name:
47
47
  log_message["airbyte_cdk"] = {"stream": {"name": stream_name}}
48
- return log_message
48
+ return log_message # type: ignore [return-value] # got "dict[str, object]", expected "dict[str, JsonType]"
49
49
 
50
50
 
51
51
  def _normalize_body_string(body_str: Optional[Union[str, bytes]]) -> Optional[str]:
@@ -5,7 +5,7 @@
5
5
  from abc import ABC, abstractmethod
6
6
  from typing import Any, Iterable, Mapping, Optional
7
7
 
8
- from deprecated.classic import deprecated
8
+ from typing_extensions import deprecated
9
9
 
10
10
  from airbyte_cdk.models import AirbyteStream
11
11
  from airbyte_cdk.sources.source import ExperimentalClassWarning
@@ -14,7 +14,10 @@ from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
14
14
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
15
15
 
16
16
 
17
- @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
17
+ @deprecated(
18
+ "This class is experimental. Use at your own risk.",
19
+ category=ExperimentalClassWarning,
20
+ )
18
21
  class AbstractStream(ABC):
19
22
  """
20
23
  AbstractStream is an experimental interface for streams developed as part of the Concurrent CDK.
@@ -8,7 +8,7 @@ import logging
8
8
  from functools import lru_cache
9
9
  from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union
10
10
 
11
- from deprecated.classic import deprecated
11
+ from typing_extensions import deprecated
12
12
 
13
13
  from airbyte_cdk.models import (
14
14
  AirbyteLogMessage,
@@ -50,7 +50,10 @@ This module contains adapters to help enabling concurrency on Stream objects wit
50
50
  """
51
51
 
52
52
 
53
- @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
53
+ @deprecated(
54
+ "This class is experimental. Use at your own risk.",
55
+ category=ExperimentalClassWarning,
56
+ )
54
57
  class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
55
58
  """
56
59
  The StreamFacade is a Stream that wraps an AbstractStream and exposes it as a Stream.
@@ -297,7 +300,7 @@ class StreamPartition(Partition):
297
300
  yield Record(
298
301
  data=data_to_return,
299
302
  stream_name=self.stream_name(),
300
- associated_slice=self._slice,
303
+ associated_slice=self._slice, # type: ignore [arg-type]
301
304
  )
302
305
  else:
303
306
  self._message_repository.emit_message(record_data)
@@ -6,7 +6,7 @@ import logging
6
6
  from abc import ABC, abstractmethod
7
7
  from typing import Optional
8
8
 
9
- from deprecated.classic import deprecated
9
+ from typing_extensions import deprecated
10
10
 
11
11
  from airbyte_cdk.sources.source import ExperimentalClassWarning
12
12
 
@@ -48,7 +48,10 @@ class StreamUnavailable(StreamAvailability):
48
48
  STREAM_AVAILABLE = StreamAvailable()
49
49
 
50
50
 
51
- @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
51
+ @deprecated(
52
+ "This class is experimental. Use at your own risk.",
53
+ category=ExperimentalClassWarning,
54
+ )
52
55
  class AbstractAvailabilityStrategy(ABC):
53
56
  """
54
57
  AbstractAvailabilityStrategy is an experimental interface developed as part of the Concurrent CDK.
@@ -68,7 +71,10 @@ class AbstractAvailabilityStrategy(ABC):
68
71
  """
69
72
 
70
73
 
71
- @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
74
+ @deprecated(
75
+ "This class is experimental. Use at your own risk.",
76
+ category=ExperimentalClassWarning,
77
+ )
72
78
  class AlwaysAvailableAvailabilityStrategy(AbstractAvailabilityStrategy):
73
79
  """
74
80
  An availability strategy that always indicates a stream is available.
@@ -240,6 +240,15 @@ class ConcurrentCursor(Cursor):
240
240
  def _extract_cursor_value(self, record: Record) -> Any:
241
241
  return self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
242
242
 
243
+ def close_partition_without_emit(self, partition: Partition) -> None:
244
+ slice_count_before = len(self.state.get("slices", []))
245
+ self._add_slice_to_state(partition)
246
+ if slice_count_before < len(
247
+ self.state["slices"]
248
+ ): # only emit if at least one slice has been processed
249
+ self._merge_partitions()
250
+ self._has_closed_at_least_one_slice = True
251
+
243
252
  def close_partition(self, partition: Partition) -> None:
244
253
  slice_count_before = len(self.state.get("slices", []))
245
254
  self._add_slice_to_state(partition)
@@ -473,7 +482,7 @@ class ConcurrentCursor(Cursor):
473
482
  :return: True if the record's cursor value falls within the sync boundaries
474
483
  """
475
484
  try:
476
- record_cursor_value: CursorValueType = self._extract_cursor_value(record) # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__
485
+ record_cursor_value: CursorValueType = self._extract_cursor_value(record)
477
486
  except ValueError:
478
487
  self._log_for_record_without_cursor_value()
479
488
  return True
@@ -141,7 +141,7 @@ class EpochValueConcurrentStreamStateConverter(DateTimeStreamStateConverter):
141
141
  raise ValueError(
142
142
  f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})"
143
143
  )
144
- return dt_object # type: ignore # we are manually type checking because pendulum.parse may return different types
144
+ return dt_object
145
145
 
146
146
 
147
147
  class IsoMillisConcurrentStreamStateConverter(DateTimeStreamStateConverter):
@@ -178,7 +178,7 @@ class IsoMillisConcurrentStreamStateConverter(DateTimeStreamStateConverter):
178
178
  raise ValueError(
179
179
  f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})"
180
180
  )
181
- return dt_object # type: ignore # we are manually type checking because pendulum.parse may return different types
181
+ return dt_object
182
182
 
183
183
 
184
184
  class CustomFormatConcurrentStreamStateConverter(IsoMillisConcurrentStreamStateConverter):