airbyte-cdk 6.7.1.dev0__py3-none-any.whl → 6.7.1rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +1 -2
  2. airbyte_cdk/config_observation.py +1 -2
  3. airbyte_cdk/connector.py +0 -1
  4. airbyte_cdk/connector_builder/connector_builder_handler.py +1 -1
  5. airbyte_cdk/connector_builder/main.py +1 -2
  6. airbyte_cdk/destinations/destination.py +1 -2
  7. airbyte_cdk/destinations/vector_db_based/config.py +1 -2
  8. airbyte_cdk/destinations/vector_db_based/document_processor.py +3 -4
  9. airbyte_cdk/destinations/vector_db_based/embedder.py +4 -5
  10. airbyte_cdk/entrypoint.py +2 -3
  11. airbyte_cdk/logger.py +1 -2
  12. airbyte_cdk/models/__init__.py +0 -2
  13. airbyte_cdk/models/airbyte_protocol.py +1 -2
  14. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +3 -3
  15. airbyte_cdk/sources/concurrent_source/concurrent_source.py +1 -1
  16. airbyte_cdk/sources/config.py +1 -2
  17. airbyte_cdk/sources/declarative/auth/jwt.py +0 -1
  18. airbyte_cdk/sources/declarative/auth/oauth.py +0 -1
  19. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +0 -1
  20. airbyte_cdk/sources/declarative/auth/token.py +1 -2
  21. airbyte_cdk/sources/declarative/auth/token_provider.py +2 -3
  22. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +8 -66
  23. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +0 -167
  24. airbyte_cdk/sources/declarative/decoders/json_decoder.py +2 -3
  25. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +0 -1
  26. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +0 -1
  27. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +0 -1
  28. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +0 -1
  29. airbyte_cdk/sources/declarative/extractors/http_selector.py +0 -1
  30. airbyte_cdk/sources/declarative/extractors/record_filter.py +48 -6
  31. airbyte_cdk/sources/declarative/extractors/record_selector.py +4 -32
  32. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +1 -2
  33. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +1 -2
  34. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +2 -5
  35. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +2 -5
  36. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +3 -1
  37. airbyte_cdk/sources/declarative/interpolation/jinja.py +4 -5
  38. airbyte_cdk/sources/declarative/manifest_declarative_source.py +3 -4
  39. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +1 -1
  40. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +0 -122
  41. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +2 -11
  42. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +0 -1
  43. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +0 -1
  44. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +0 -1
  45. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +0 -1
  46. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +0 -1
  47. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +0 -1
  48. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +0 -1
  49. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +0 -1
  50. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +0 -1
  51. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +1 -2
  52. airbyte_cdk/sources/declarative/requesters/http_requester.py +0 -1
  53. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +0 -1
  54. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +0 -1
  55. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +0 -1
  56. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +0 -1
  57. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +0 -1
  58. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +0 -1
  59. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +0 -1
  60. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +3 -9
  61. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +1 -2
  62. airbyte_cdk/sources/declarative/requesters/requester.py +0 -1
  63. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +1 -2
  64. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +7 -12
  65. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +4 -7
  66. airbyte_cdk/sources/declarative/transformations/add_fields.py +0 -1
  67. airbyte_cdk/sources/declarative/transformations/remove_fields.py +0 -1
  68. airbyte_cdk/sources/declarative/yaml_declarative_source.py +0 -1
  69. airbyte_cdk/sources/embedded/tools.py +0 -1
  70. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +1 -2
  71. airbyte_cdk/sources/file_based/config/avro_format.py +1 -2
  72. airbyte_cdk/sources/file_based/config/csv_format.py +1 -2
  73. airbyte_cdk/sources/file_based/config/excel_format.py +1 -2
  74. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +1 -2
  75. airbyte_cdk/sources/file_based/config/jsonl_format.py +1 -2
  76. airbyte_cdk/sources/file_based/config/parquet_format.py +1 -2
  77. airbyte_cdk/sources/file_based/config/unstructured_format.py +1 -2
  78. airbyte_cdk/sources/file_based/file_based_source.py +1 -2
  79. airbyte_cdk/sources/file_based/file_based_stream_reader.py +1 -2
  80. airbyte_cdk/sources/file_based/file_types/avro_parser.py +0 -1
  81. airbyte_cdk/sources/file_based/file_types/csv_parser.py +1 -2
  82. airbyte_cdk/sources/file_based/file_types/excel_parser.py +5 -5
  83. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +1 -2
  84. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +1 -2
  85. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +8 -9
  86. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +1 -2
  87. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +4 -5
  88. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +1 -1
  89. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +1 -1
  90. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +1 -1
  91. airbyte_cdk/sources/http_logger.py +0 -1
  92. airbyte_cdk/sources/streams/call_rate.py +2 -1
  93. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +1 -2
  94. airbyte_cdk/sources/streams/concurrent/adapters.py +4 -8
  95. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +1 -2
  96. airbyte_cdk/sources/streams/concurrent/cursor.py +6 -30
  97. airbyte_cdk/sources/streams/concurrent/default_stream.py +0 -1
  98. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -1
  99. airbyte_cdk/sources/streams/concurrent/partitions/record.py +35 -0
  100. airbyte_cdk/sources/streams/concurrent/partitions/types.py +1 -1
  101. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +1 -1
  102. airbyte_cdk/sources/streams/core.py +1 -2
  103. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +1 -2
  104. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +0 -1
  105. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +0 -1
  106. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +1 -2
  107. airbyte_cdk/sources/streams/http/http.py +2 -3
  108. airbyte_cdk/sources/streams/http/http_client.py +8 -49
  109. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +1 -2
  110. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +0 -1
  111. airbyte_cdk/sources/types.py +1 -14
  112. airbyte_cdk/sources/utils/schema_helpers.py +2 -3
  113. airbyte_cdk/sql/secrets.py +1 -2
  114. airbyte_cdk/sql/shared/sql_processor.py +6 -8
  115. airbyte_cdk/test/entrypoint_wrapper.py +3 -4
  116. airbyte_cdk/test/mock_http/mocker.py +0 -1
  117. airbyte_cdk/utils/schema_inferrer.py +1 -2
  118. airbyte_cdk/utils/slice_hasher.py +1 -1
  119. airbyte_cdk/utils/traced_exception.py +1 -2
  120. {airbyte_cdk-6.7.1.dev0.dist-info → airbyte_cdk-6.7.1rc1.dist-info}/METADATA +2 -9
  121. {airbyte_cdk-6.7.1.dev0.dist-info → airbyte_cdk-6.7.1rc1.dist-info}/RECORD +124 -123
  122. {airbyte_cdk-6.7.1.dev0.dist-info → airbyte_cdk-6.7.1rc1.dist-info}/LICENSE.txt +0 -0
  123. {airbyte_cdk-6.7.1.dev0.dist-info → airbyte_cdk-6.7.1rc1.dist-info}/WHEEL +0 -0
  124. {airbyte_cdk-6.7.1.dev0.dist-info → airbyte_cdk-6.7.1rc1.dist-info}/entry_points.txt +0 -0
@@ -6,7 +6,6 @@ from dataclasses import InitVar, dataclass
6
6
  from typing import Any, Mapping, MutableMapping, Optional, Union
7
7
 
8
8
  import requests
9
-
10
9
  from airbyte_cdk.sources.declarative.requesters.paginators.paginator import Paginator
11
10
  from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
12
11
 
@@ -7,7 +7,6 @@ from dataclasses import dataclass
7
7
  from typing import Any, Mapping, Optional
8
8
 
9
9
  import requests
10
-
11
10
  from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import (
12
11
  RequestOptionsProvider,
13
12
  )
@@ -6,7 +6,6 @@ from dataclasses import InitVar, dataclass, field
6
6
  from typing import Any, Dict, Mapping, Optional, Union
7
7
 
8
8
  import requests
9
-
10
9
  from airbyte_cdk.sources.declarative.decoders import (
11
10
  Decoder,
12
11
  JsonDecoder,
@@ -6,7 +6,6 @@ from dataclasses import InitVar, dataclass, field
6
6
  from typing import Any, Mapping, Optional, Union
7
7
 
8
8
  import requests
9
-
10
9
  from airbyte_cdk.sources.declarative.decoders import (
11
10
  Decoder,
12
11
  JsonDecoder,
@@ -6,7 +6,6 @@ from dataclasses import InitVar, dataclass
6
6
  from typing import Any, Mapping, Optional, Union
7
7
 
8
8
  import requests
9
-
10
9
  from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
11
10
  from airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy import (
12
11
  PaginationStrategy,
@@ -7,7 +7,6 @@ from dataclasses import dataclass
7
7
  from typing import Any, Optional
8
8
 
9
9
  import requests
10
-
11
10
  from airbyte_cdk.sources.types import Record
12
11
 
13
12
 
@@ -6,12 +6,10 @@ from abc import ABC, abstractmethod
6
6
  from typing import Any, Optional
7
7
 
8
8
  import requests
9
-
10
9
  from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
11
10
  from airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy import (
12
11
  PaginationStrategy,
13
12
  )
14
- from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor
15
13
  from airbyte_cdk.sources.types import Record
16
14
 
17
15
 
@@ -27,11 +25,7 @@ class PaginationStopCondition(ABC):
27
25
 
28
26
 
29
27
  class CursorStopCondition(PaginationStopCondition):
30
- def __init__(
31
- self,
32
- cursor: DeclarativeCursor
33
- | ConcurrentCursor, # migrate to use both old and concurrent versions
34
- ):
28
+ def __init__(self, cursor: DeclarativeCursor):
35
29
  self._cursor = cursor
36
30
 
37
31
  def is_met(self, record: Record) -> bool:
@@ -52,8 +46,8 @@ class StopConditionPaginationStrategyDecorator(PaginationStrategy):
52
46
  return None
53
47
  return self._delegate.next_page_token(response, last_page_size, last_record)
54
48
 
55
- def reset(self, reset_value: Optional[Any] = None) -> None:
56
- self._delegate.reset(reset_value)
49
+ def reset(self) -> None:
50
+ self._delegate.reset()
57
51
 
58
52
  def get_page_size(self) -> Optional[int]:
59
53
  return self._delegate.get_page_size()
@@ -5,8 +5,6 @@
5
5
  from dataclasses import InitVar, dataclass, field
6
6
  from typing import Any, Mapping, MutableMapping, Optional, Union
7
7
 
8
- from deprecated import deprecated
9
-
10
8
  from airbyte_cdk.sources.declarative.interpolation.interpolated_nested_mapping import NestedMapping
11
9
  from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_nested_request_input_provider import (
12
10
  InterpolatedNestedRequestInputProvider,
@@ -19,6 +17,7 @@ from airbyte_cdk.sources.declarative.requesters.request_options.request_options_
19
17
  )
20
18
  from airbyte_cdk.sources.source import ExperimentalClassWarning
21
19
  from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
20
+ from deprecated import deprecated
22
21
 
23
22
  RequestInput = Union[str, Mapping[str, str]]
24
23
  ValidRequestTypes = (str, list)
@@ -7,7 +7,6 @@ from enum import Enum
7
7
  from typing import Any, Callable, Mapping, MutableMapping, Optional, Union
8
8
 
9
9
  import requests
10
-
11
10
  from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
12
11
  from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import (
13
12
  RequestOptionsProvider,
@@ -4,8 +4,6 @@
4
4
  from dataclasses import InitVar, dataclass, field
5
5
  from typing import Any, Callable, Iterable, Mapping, Optional
6
6
 
7
- from deprecated.classic import deprecated
8
-
9
7
  from airbyte_cdk.models import FailureType
10
8
  from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
11
9
  AsyncJobOrchestrator,
@@ -19,6 +17,7 @@ from airbyte_cdk.sources.source import ExperimentalClassWarning
19
17
  from airbyte_cdk.sources.streams.core import StreamData
20
18
  from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
21
19
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
20
+ from deprecated.classic import deprecated
22
21
 
23
22
 
24
23
  @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
@@ -20,7 +20,6 @@ from typing import (
20
20
  )
21
21
 
22
22
  import requests
23
-
24
23
  from airbyte_cdk.models import AirbyteMessage
25
24
  from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
26
25
  from airbyte_cdk.sources.declarative.incremental import ResumableFullRefreshCursor
@@ -361,6 +360,9 @@ class SimpleRetriever(Retriever):
361
360
  next_page_token = self._next_page_token(response)
362
361
  if not next_page_token:
363
362
  pagination_complete = True
363
+ # Closing the response to avoid memory issues. Note that this assumes the caller as completely consumed the response before
364
+ # iterating on another one
365
+ response.close()
364
366
 
365
367
  # Always return an empty generator just in case no records were ever yielded
366
368
  yield from []
@@ -468,9 +470,8 @@ class SimpleRetriever(Retriever):
468
470
  else:
469
471
  return None
470
472
 
471
- def _extract_record(
472
- self, stream_data: StreamData, stream_slice: StreamSlice
473
- ) -> Optional[Record]:
473
+ @staticmethod
474
+ def _extract_record(stream_data: StreamData, stream_slice: StreamSlice) -> Optional[Record]:
474
475
  """
475
476
  As we allow the output of _read_pages to be StreamData, it can be multiple things. Therefore, we need to filter out and normalize
476
477
  to data to streamline the rest of the process.
@@ -479,15 +480,9 @@ class SimpleRetriever(Retriever):
479
480
  # Record is not part of `StreamData` but is the most common implementation of `Mapping[str, Any]` which is part of `StreamData`
480
481
  return stream_data
481
482
  elif isinstance(stream_data, (dict, Mapping)):
482
- return Record(
483
- data=dict(stream_data), associated_slice=stream_slice, stream_name=self.name
484
- )
483
+ return Record(dict(stream_data), stream_slice)
485
484
  elif isinstance(stream_data, AirbyteMessage) and stream_data.record:
486
- return Record(
487
- data=stream_data.record.data, # type:ignore # AirbyteMessage always has record.data
488
- associated_slice=stream_slice,
489
- stream_name=self.name,
490
- )
485
+ return Record(stream_data.record.data, stream_slice)
491
486
  return None
492
487
 
493
488
  # stream_slices is defined with arguments on http stream and fixing this has a long tail of dependencies. Will be resolved by the decoupling of http stream and simple retriever
@@ -1,13 +1,14 @@
1
1
  # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
2
 
3
- from typing import Any, Callable, Iterable, Mapping, Optional
3
+ from typing import Iterable, Optional, Mapping, Any, Callable
4
4
 
5
5
  from airbyte_cdk.sources.declarative.retrievers import Retriever
6
6
  from airbyte_cdk.sources.message import MessageRepository
7
7
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
8
8
  from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
9
+ from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
9
10
  from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
10
- from airbyte_cdk.sources.types import Record, StreamSlice
11
+ from airbyte_cdk.sources.types import StreamSlice
11
12
  from airbyte_cdk.utils.slice_hasher import SliceHasher
12
13
 
13
14
 
@@ -58,11 +59,7 @@ class DeclarativePartition(Partition):
58
59
  def read(self) -> Iterable[Record]:
59
60
  for stream_data in self._retriever.read_records(self._json_schema, self._stream_slice):
60
61
  if isinstance(stream_data, Mapping):
61
- yield Record(
62
- data=stream_data,
63
- stream_name=self.stream_name(),
64
- associated_slice=self._stream_slice,
65
- )
62
+ yield Record(stream_data, self)
66
63
  else:
67
64
  self._message_repository.emit_message(stream_data)
68
65
 
@@ -6,7 +6,6 @@ from dataclasses import InitVar, dataclass, field
6
6
  from typing import Any, Dict, List, Mapping, Optional, Type, Union
7
7
 
8
8
  import dpath
9
-
10
9
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
11
10
  from airbyte_cdk.sources.declarative.transformations import RecordTransformation
12
11
  from airbyte_cdk.sources.types import Config, FieldPointer, StreamSlice, StreamState
@@ -7,7 +7,6 @@ from typing import Any, Dict, List, Mapping, Optional
7
7
 
8
8
  import dpath
9
9
  import dpath.exceptions
10
-
11
10
  from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
12
11
  from airbyte_cdk.sources.declarative.transformations import RecordTransformation
13
12
  from airbyte_cdk.sources.types import Config, FieldPointer, StreamSlice, StreamState
@@ -6,7 +6,6 @@ import pkgutil
6
6
  from typing import Any, List, Mapping, Optional
7
7
 
8
8
  import yaml
9
-
10
9
  from airbyte_cdk.models import AirbyteStateMessage, ConfiguredAirbyteCatalog
11
10
  from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
12
11
  ConcurrentDeclarativeSource,
@@ -5,7 +5,6 @@
5
5
  from typing import Any, Callable, Dict, Iterable, Optional
6
6
 
7
7
  import dpath
8
-
9
8
  from airbyte_cdk.models import AirbyteStream
10
9
 
11
10
 
@@ -7,11 +7,10 @@ from abc import abstractmethod
7
7
  from typing import Any, Dict, List, Literal, Optional, Union
8
8
 
9
9
  import dpath
10
- from pydantic.v1 import AnyUrl, BaseModel, Field
11
-
12
10
  from airbyte_cdk import OneOfOptionConfig
13
11
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
14
12
  from airbyte_cdk.sources.utils import schema_helpers
13
+ from pydantic.v1 import AnyUrl, BaseModel, Field
15
14
 
16
15
 
17
16
  class DeliverRecords(BaseModel):
@@ -3,9 +3,8 @@
3
3
  #
4
4
 
5
5
 
6
- from pydantic.v1 import BaseModel, Field
7
-
8
6
  from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
7
+ from pydantic.v1 import BaseModel, Field
9
8
 
10
9
 
11
10
  class AvroFormat(BaseModel):
@@ -6,11 +6,10 @@ import codecs
6
6
  from enum import Enum
7
7
  from typing import Any, Dict, List, Optional, Set, Union
8
8
 
9
+ from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
9
10
  from pydantic.v1 import BaseModel, Field, root_validator, validator
10
11
  from pydantic.v1.error_wrappers import ValidationError
11
12
 
12
- from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
13
-
14
13
 
15
14
  class InferenceType(Enum):
16
15
  NONE = "None"
@@ -2,9 +2,8 @@
2
2
  # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from pydantic.v1 import BaseModel, Field
6
-
7
5
  from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
6
+ from pydantic.v1 import BaseModel, Field
8
7
 
9
8
 
10
9
  class ExcelFormat(BaseModel):
@@ -5,8 +5,6 @@
5
5
  from enum import Enum
6
6
  from typing import Any, List, Mapping, Optional, Union
7
7
 
8
- from pydantic.v1 import BaseModel, Field, validator
9
-
10
8
  from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
11
9
  from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
12
10
  from airbyte_cdk.sources.file_based.config.excel_format import ExcelFormat
@@ -15,6 +13,7 @@ from airbyte_cdk.sources.file_based.config.parquet_format import ParquetFormat
15
13
  from airbyte_cdk.sources.file_based.config.unstructured_format import UnstructuredFormat
16
14
  from airbyte_cdk.sources.file_based.exceptions import ConfigValidationError, FileBasedSourceError
17
15
  from airbyte_cdk.sources.file_based.schema_helpers import type_mapping_to_jsonschema
16
+ from pydantic.v1 import BaseModel, Field, validator
18
17
 
19
18
  PrimaryKeyType = Optional[Union[str, List[str]]]
20
19
 
@@ -2,9 +2,8 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from pydantic.v1 import BaseModel, Field
6
-
7
5
  from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
6
+ from pydantic.v1 import BaseModel, Field
8
7
 
9
8
 
10
9
  class JsonlFormat(BaseModel):
@@ -3,9 +3,8 @@
3
3
  #
4
4
 
5
5
 
6
- from pydantic.v1 import BaseModel, Field
7
-
8
6
  from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
7
+ from pydantic.v1 import BaseModel, Field
9
8
 
10
9
 
11
10
  class ParquetFormat(BaseModel):
@@ -4,9 +4,8 @@
4
4
 
5
5
  from typing import List, Literal, Optional, Union
6
6
 
7
- from pydantic.v1 import BaseModel, Field
8
-
9
7
  from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
8
+ from pydantic.v1 import BaseModel, Field
10
9
 
11
10
 
12
11
  class LocalProcessingConfigModel(BaseModel):
@@ -8,8 +8,6 @@ from abc import ABC
8
8
  from collections import Counter
9
9
  from typing import Any, Iterator, List, Mapping, Optional, Tuple, Type, Union
10
10
 
11
- from pydantic.v1.error_wrappers import ValidationError
12
-
13
11
  from airbyte_cdk.logger import AirbyteLogFormatter, init_logger
14
12
  from airbyte_cdk.models import (
15
13
  AirbyteMessage,
@@ -62,6 +60,7 @@ from airbyte_cdk.sources.streams import Stream
62
60
  from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
63
61
  from airbyte_cdk.utils.analytics_message import create_analytics_message
64
62
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
63
+ from pydantic.v1.error_wrappers import ValidationError
65
64
 
66
65
  DEFAULT_CONCURRENCY = 100
67
66
  MAX_CONCURRENCY = 100
@@ -10,10 +10,9 @@ from io import IOBase
10
10
  from os import makedirs, path
11
11
  from typing import Any, Dict, Iterable, List, Optional, Set
12
12
 
13
- from wcmatch.glob import GLOBSTAR, globmatch
14
-
15
13
  from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
16
14
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
15
+ from wcmatch.glob import GLOBSTAR, globmatch
17
16
 
18
17
 
19
18
  class FileReadMode(Enum):
@@ -6,7 +6,6 @@ import logging
6
6
  from typing import Any, Dict, Iterable, Mapping, Optional, Tuple
7
7
 
8
8
  import fastavro
9
-
10
9
  from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
11
10
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
12
11
  from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError
@@ -12,8 +12,6 @@ from io import IOBase
12
12
  from typing import Any, Callable, Dict, Generator, Iterable, List, Mapping, Optional, Set, Tuple
13
13
  from uuid import uuid4
14
14
 
15
- from orjson import orjson
16
-
17
15
  from airbyte_cdk.models import FailureType
18
16
  from airbyte_cdk.sources.file_based.config.csv_format import (
19
17
  CsvFormat,
@@ -31,6 +29,7 @@ from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeP
31
29
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
32
30
  from airbyte_cdk.sources.file_based.schema_helpers import TYPE_PYTHON_MAPPING, SchemaType
33
31
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
32
+ from orjson import orjson
34
33
 
35
34
  DIALECT_NAME = "_config_dialect"
36
35
 
@@ -8,11 +8,6 @@ from pathlib import Path
8
8
  from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union
9
9
 
10
10
  import pandas as pd
11
- from numpy import datetime64, issubdtype
12
- from numpy import dtype as dtype_
13
- from orjson import orjson
14
- from pydantic.v1 import BaseModel
15
-
16
11
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import (
17
12
  ExcelFormat,
18
13
  FileBasedStreamConfig,
@@ -29,6 +24,11 @@ from airbyte_cdk.sources.file_based.file_based_stream_reader import (
29
24
  from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
30
25
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
31
26
  from airbyte_cdk.sources.file_based.schema_helpers import SchemaType
27
+ from numpy import datetime64
28
+ from numpy import dtype as dtype_
29
+ from numpy import issubdtype
30
+ from orjson import orjson
31
+ from pydantic.v1 import BaseModel
32
32
 
33
33
 
34
34
  class ExcelParser(FileTypeParser):
@@ -6,8 +6,6 @@ import json
6
6
  import logging
7
7
  from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union
8
8
 
9
- from orjson import orjson
10
-
11
9
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
12
10
  from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError
13
11
  from airbyte_cdk.sources.file_based.file_based_stream_reader import (
@@ -21,6 +19,7 @@ from airbyte_cdk.sources.file_based.schema_helpers import (
21
19
  SchemaType,
22
20
  merge_schemas,
23
21
  )
22
+ from orjson import orjson
24
23
 
25
24
 
26
25
  class JsonlParser(FileTypeParser):
@@ -10,8 +10,6 @@ from urllib.parse import unquote
10
10
 
11
11
  import pyarrow as pa
12
12
  import pyarrow.parquet as pq
13
- from pyarrow import DictionaryArray, Scalar
14
-
15
13
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import (
16
14
  FileBasedStreamConfig,
17
15
  ParquetFormat,
@@ -28,6 +26,7 @@ from airbyte_cdk.sources.file_based.file_based_stream_reader import (
28
26
  from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
29
27
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
30
28
  from airbyte_cdk.sources.file_based.schema_helpers import SchemaType
29
+ from pyarrow import DictionaryArray, Scalar
31
30
 
32
31
 
33
32
  class ParquetParser(FileTypeParser):
@@ -9,16 +9,7 @@ from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple, Union
9
9
 
10
10
  import backoff
11
11
  import dpath
12
- import nltk
13
12
  import requests
14
- from unstructured.file_utils.filetype import (
15
- EXT_TO_FILETYPE,
16
- FILETYPE_TO_MIMETYPE,
17
- STR_TO_FILETYPE,
18
- FileType,
19
- detect_filetype,
20
- )
21
-
22
13
  from airbyte_cdk.models import FailureType
23
14
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
24
15
  from airbyte_cdk.sources.file_based.config.unstructured_format import (
@@ -37,6 +28,14 @@ from airbyte_cdk.sources.file_based.remote_file import RemoteFile
37
28
  from airbyte_cdk.sources.file_based.schema_helpers import SchemaType
38
29
  from airbyte_cdk.utils import is_cloud_environment
39
30
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
31
+ from unstructured.file_utils.filetype import (
32
+ EXT_TO_FILETYPE,
33
+ FILETYPE_TO_MIMETYPE,
34
+ STR_TO_FILETYPE,
35
+ FileType,
36
+ detect_filetype,
37
+ )
38
+ import nltk
40
39
 
41
40
  unstructured_partition_pdf = None
42
41
  unstructured_partition_docx = None
@@ -6,8 +6,6 @@ from abc import abstractmethod
6
6
  from functools import cache, cached_property, lru_cache
7
7
  from typing import Any, Dict, Iterable, List, Mapping, Optional, Type
8
8
 
9
- from deprecated import deprecated
10
-
11
9
  from airbyte_cdk import AirbyteMessage
12
10
  from airbyte_cdk.models import SyncMode
13
11
  from airbyte_cdk.sources.file_based.availability_strategy import (
@@ -32,6 +30,7 @@ from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor
32
30
  from airbyte_cdk.sources.file_based.types import StreamSlice
33
31
  from airbyte_cdk.sources.streams import Stream
34
32
  from airbyte_cdk.sources.streams.checkpoint import Cursor
33
+ from deprecated import deprecated
35
34
 
36
35
 
37
36
  class AbstractFileBasedStream(Stream):
@@ -7,8 +7,6 @@ import logging
7
7
  from functools import cache, lru_cache
8
8
  from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union
9
9
 
10
- from deprecated.classic import deprecated
11
-
12
10
  from airbyte_cdk.models import (
13
11
  AirbyteLogMessage,
14
12
  AirbyteMessage,
@@ -41,10 +39,11 @@ from airbyte_cdk.sources.streams.concurrent.helpers import (
41
39
  )
42
40
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
43
41
  from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
42
+ from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
44
43
  from airbyte_cdk.sources.streams.core import StreamData
45
- from airbyte_cdk.sources.types import Record
46
44
  from airbyte_cdk.sources.utils.schema_helpers import InternalConfig
47
45
  from airbyte_cdk.sources.utils.slice_logger import SliceLogger
46
+ from deprecated.classic import deprecated
48
47
 
49
48
  if TYPE_CHECKING:
50
49
  from airbyte_cdk.sources.file_based.stream.concurrent.cursor import (
@@ -248,7 +247,7 @@ class FileBasedStreamPartition(Partition):
248
247
  self._stream.transformer.transform(
249
248
  data_to_return, self._stream.get_json_schema()
250
249
  )
251
- yield Record(data=data_to_return, stream_name=self.stream_name())
250
+ yield Record(data_to_return, self)
252
251
  elif (
253
252
  isinstance(record_data, AirbyteMessage)
254
253
  and record_data.type == Type.RECORD
@@ -266,7 +265,7 @@ class FileBasedStreamPartition(Partition):
266
265
  else:
267
266
  yield Record(
268
267
  data=record_message_data,
269
- stream_name=self.stream_name(),
268
+ partition=self,
270
269
  is_file_transfer_message=self._use_file_transfer(),
271
270
  )
272
271
  else:
@@ -12,7 +12,7 @@ from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor
12
12
  from airbyte_cdk.sources.file_based.types import StreamState
13
13
  from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
14
14
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
15
- from airbyte_cdk.sources.types import Record
15
+ from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
16
16
 
17
17
  if TYPE_CHECKING:
18
18
  from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamPartition
@@ -19,7 +19,7 @@ from airbyte_cdk.sources.file_based.types import StreamState
19
19
  from airbyte_cdk.sources.message.repository import MessageRepository
20
20
  from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
21
21
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
22
- from airbyte_cdk.sources.types import Record
22
+ from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
23
23
 
24
24
  if TYPE_CHECKING:
25
25
  from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamPartition
@@ -16,7 +16,7 @@ from airbyte_cdk.sources.file_based.types import StreamState
16
16
  from airbyte_cdk.sources.message import MessageRepository
17
17
  from airbyte_cdk.sources.streams import NO_CURSOR_STATE_KEY
18
18
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
19
- from airbyte_cdk.sources.types import Record
19
+ from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
20
20
 
21
21
  if TYPE_CHECKING:
22
22
  from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamPartition
@@ -5,7 +5,6 @@
5
5
  from typing import Optional, Union
6
6
 
7
7
  import requests
8
-
9
8
  from airbyte_cdk.sources.message import LogMessage
10
9
 
11
10
 
@@ -14,8 +14,9 @@ from urllib import parse
14
14
 
15
15
  import requests
16
16
  import requests_cache
17
- from pyrate_limiter import InMemoryBucket, Limiter, RateItem, TimeClock
17
+ from pyrate_limiter import InMemoryBucket, Limiter
18
18
  from pyrate_limiter import Rate as PyRateRate
19
+ from pyrate_limiter import RateItem, TimeClock
19
20
  from pyrate_limiter.exceptions import BucketFullException
20
21
 
21
22
  # prevents mypy from complaining about missing session attributes in LimiterMixin
@@ -5,13 +5,12 @@
5
5
  from abc import ABC, abstractmethod
6
6
  from typing import Any, Iterable, Mapping, Optional
7
7
 
8
- from deprecated.classic import deprecated
9
-
10
8
  from airbyte_cdk.models import AirbyteStream
11
9
  from airbyte_cdk.sources.source import ExperimentalClassWarning
12
10
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import StreamAvailability
13
11
  from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
14
12
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
13
+ from deprecated.classic import deprecated
15
14
 
16
15
 
17
16
  @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)