airbyte-cdk 6.7.1rc4__py3-none-any.whl → 6.7.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +2 -1
  2. airbyte_cdk/config_observation.py +2 -1
  3. airbyte_cdk/connector.py +1 -0
  4. airbyte_cdk/connector_builder/connector_builder_handler.py +1 -1
  5. airbyte_cdk/connector_builder/main.py +2 -1
  6. airbyte_cdk/destinations/destination.py +2 -1
  7. airbyte_cdk/destinations/vector_db_based/config.py +2 -1
  8. airbyte_cdk/destinations/vector_db_based/document_processor.py +4 -3
  9. airbyte_cdk/destinations/vector_db_based/embedder.py +5 -4
  10. airbyte_cdk/entrypoint.py +3 -2
  11. airbyte_cdk/logger.py +2 -1
  12. airbyte_cdk/models/__init__.py +2 -0
  13. airbyte_cdk/models/airbyte_protocol.py +2 -1
  14. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +3 -3
  15. airbyte_cdk/sources/concurrent_source/concurrent_source.py +1 -1
  16. airbyte_cdk/sources/config.py +2 -1
  17. airbyte_cdk/sources/declarative/auth/jwt.py +1 -0
  18. airbyte_cdk/sources/declarative/auth/oauth.py +1 -0
  19. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +1 -0
  20. airbyte_cdk/sources/declarative/auth/token.py +2 -1
  21. airbyte_cdk/sources/declarative/auth/token_provider.py +3 -2
  22. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +66 -8
  23. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +196 -0
  24. airbyte_cdk/sources/declarative/decoders/json_decoder.py +3 -2
  25. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +1 -0
  26. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +1 -0
  27. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +1 -0
  28. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +1 -0
  29. airbyte_cdk/sources/declarative/extractors/http_selector.py +1 -0
  30. airbyte_cdk/sources/declarative/extractors/record_filter.py +6 -48
  31. airbyte_cdk/sources/declarative/extractors/record_selector.py +32 -4
  32. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +7 -2
  33. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +2 -1
  34. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +5 -2
  35. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +5 -2
  36. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +1 -3
  37. airbyte_cdk/sources/declarative/interpolation/jinja.py +5 -4
  38. airbyte_cdk/sources/declarative/manifest_declarative_source.py +4 -3
  39. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +1 -1
  40. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +144 -0
  41. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +45 -4
  42. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +1 -0
  43. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +1 -0
  44. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +1 -0
  45. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +1 -0
  46. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +1 -0
  47. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +1 -0
  48. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +1 -0
  49. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +1 -0
  50. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +1 -0
  51. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +3 -2
  52. airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -0
  53. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +1 -0
  54. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +1 -0
  55. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +1 -0
  56. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +1 -0
  57. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +1 -0
  58. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +1 -0
  59. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +1 -0
  60. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +9 -3
  61. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +2 -1
  62. airbyte_cdk/sources/declarative/requesters/requester.py +1 -0
  63. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +2 -1
  64. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +12 -7
  65. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +7 -4
  66. airbyte_cdk/sources/declarative/transformations/add_fields.py +1 -0
  67. airbyte_cdk/sources/declarative/transformations/remove_fields.py +1 -0
  68. airbyte_cdk/sources/declarative/yaml_declarative_source.py +1 -0
  69. airbyte_cdk/sources/embedded/tools.py +1 -0
  70. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +2 -1
  71. airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
  72. airbyte_cdk/sources/file_based/config/csv_format.py +2 -1
  73. airbyte_cdk/sources/file_based/config/excel_format.py +2 -1
  74. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +2 -1
  75. airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
  76. airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
  77. airbyte_cdk/sources/file_based/config/unstructured_format.py +2 -1
  78. airbyte_cdk/sources/file_based/file_based_source.py +2 -1
  79. airbyte_cdk/sources/file_based/file_based_stream_reader.py +2 -1
  80. airbyte_cdk/sources/file_based/file_types/avro_parser.py +1 -0
  81. airbyte_cdk/sources/file_based/file_types/csv_parser.py +2 -1
  82. airbyte_cdk/sources/file_based/file_types/excel_parser.py +5 -5
  83. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +2 -1
  84. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +2 -1
  85. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +9 -8
  86. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +2 -1
  87. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +5 -4
  88. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +1 -1
  89. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +1 -1
  90. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +1 -1
  91. airbyte_cdk/sources/http_logger.py +1 -0
  92. airbyte_cdk/sources/streams/call_rate.py +1 -2
  93. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +2 -1
  94. airbyte_cdk/sources/streams/concurrent/adapters.py +8 -4
  95. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +2 -1
  96. airbyte_cdk/sources/streams/concurrent/cursor.py +52 -9
  97. airbyte_cdk/sources/streams/concurrent/default_stream.py +1 -0
  98. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -1
  99. airbyte_cdk/sources/streams/concurrent/partitions/types.py +1 -1
  100. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +1 -1
  101. airbyte_cdk/sources/streams/core.py +2 -1
  102. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +2 -1
  103. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +1 -0
  104. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +1 -0
  105. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +2 -1
  106. airbyte_cdk/sources/streams/http/http.py +3 -2
  107. airbyte_cdk/sources/streams/http/http_client.py +58 -11
  108. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +2 -1
  109. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +1 -0
  110. airbyte_cdk/sources/types.py +14 -1
  111. airbyte_cdk/sources/utils/schema_helpers.py +3 -2
  112. airbyte_cdk/sql/secrets.py +2 -1
  113. airbyte_cdk/sql/shared/sql_processor.py +8 -6
  114. airbyte_cdk/test/entrypoint_wrapper.py +4 -3
  115. airbyte_cdk/test/mock_http/mocker.py +1 -0
  116. airbyte_cdk/utils/schema_inferrer.py +2 -1
  117. airbyte_cdk/utils/slice_hasher.py +1 -1
  118. airbyte_cdk/utils/traced_exception.py +2 -1
  119. {airbyte_cdk-6.7.1rc4.dist-info → airbyte_cdk-6.7.2.dev0.dist-info}/METADATA +9 -2
  120. {airbyte_cdk-6.7.1rc4.dist-info → airbyte_cdk-6.7.2.dev0.dist-info}/RECORD +123 -124
  121. airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -35
  122. {airbyte_cdk-6.7.1rc4.dist-info → airbyte_cdk-6.7.2.dev0.dist-info}/LICENSE.txt +0 -0
  123. {airbyte_cdk-6.7.1rc4.dist-info → airbyte_cdk-6.7.2.dev0.dist-info}/WHEEL +0 -0
  124. {airbyte_cdk-6.7.1rc4.dist-info → airbyte_cdk-6.7.2.dev0.dist-info}/entry_points.txt +0 -0
@@ -1,14 +1,13 @@
1
1
  # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
2
 
3
- from typing import Iterable, Optional, Mapping, Any, Callable
3
+ from typing import Any, Callable, Iterable, Mapping, Optional
4
4
 
5
5
  from airbyte_cdk.sources.declarative.retrievers import Retriever
6
6
  from airbyte_cdk.sources.message import MessageRepository
7
7
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
8
8
  from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
9
- from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
10
9
  from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
11
- from airbyte_cdk.sources.types import StreamSlice
10
+ from airbyte_cdk.sources.types import Record, StreamSlice
12
11
  from airbyte_cdk.utils.slice_hasher import SliceHasher
13
12
 
14
13
 
@@ -59,7 +58,11 @@ class DeclarativePartition(Partition):
59
58
  def read(self) -> Iterable[Record]:
60
59
  for stream_data in self._retriever.read_records(self._json_schema, self._stream_slice):
61
60
  if isinstance(stream_data, Mapping):
62
- yield Record(stream_data, self)
61
+ yield Record(
62
+ data=stream_data,
63
+ stream_name=self.stream_name(),
64
+ associated_slice=self._stream_slice,
65
+ )
63
66
  else:
64
67
  self._message_repository.emit_message(stream_data)
65
68
 
@@ -6,6 +6,7 @@ from dataclasses import InitVar, dataclass, field
6
6
  from typing import Any, Dict, List, Mapping, Optional, Type, Union
7
7
 
8
8
  import dpath
9
+
9
10
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
10
11
  from airbyte_cdk.sources.declarative.transformations import RecordTransformation
11
12
  from airbyte_cdk.sources.types import Config, FieldPointer, StreamSlice, StreamState
@@ -7,6 +7,7 @@ from typing import Any, Dict, List, Mapping, Optional
7
7
 
8
8
  import dpath
9
9
  import dpath.exceptions
10
+
10
11
  from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
11
12
  from airbyte_cdk.sources.declarative.transformations import RecordTransformation
12
13
  from airbyte_cdk.sources.types import Config, FieldPointer, StreamSlice, StreamState
@@ -6,6 +6,7 @@ import pkgutil
6
6
  from typing import Any, List, Mapping, Optional
7
7
 
8
8
  import yaml
9
+
9
10
  from airbyte_cdk.models import AirbyteStateMessage, ConfiguredAirbyteCatalog
10
11
  from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
11
12
  ConcurrentDeclarativeSource,
@@ -5,6 +5,7 @@
5
5
  from typing import Any, Callable, Dict, Iterable, Optional
6
6
 
7
7
  import dpath
8
+
8
9
  from airbyte_cdk.models import AirbyteStream
9
10
 
10
11
 
@@ -7,10 +7,11 @@ from abc import abstractmethod
7
7
  from typing import Any, Dict, List, Literal, Optional, Union
8
8
 
9
9
  import dpath
10
+ from pydantic.v1 import AnyUrl, BaseModel, Field
11
+
10
12
  from airbyte_cdk import OneOfOptionConfig
11
13
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
12
14
  from airbyte_cdk.sources.utils import schema_helpers
13
- from pydantic.v1 import AnyUrl, BaseModel, Field
14
15
 
15
16
 
16
17
  class DeliverRecords(BaseModel):
@@ -3,9 +3,10 @@
3
3
  #
4
4
 
5
5
 
6
- from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
7
6
  from pydantic.v1 import BaseModel, Field
8
7
 
8
+ from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
9
+
9
10
 
10
11
  class AvroFormat(BaseModel):
11
12
  class Config(OneOfOptionConfig):
@@ -6,10 +6,11 @@ import codecs
6
6
  from enum import Enum
7
7
  from typing import Any, Dict, List, Optional, Set, Union
8
8
 
9
- from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
10
9
  from pydantic.v1 import BaseModel, Field, root_validator, validator
11
10
  from pydantic.v1.error_wrappers import ValidationError
12
11
 
12
+ from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
13
+
13
14
 
14
15
  class InferenceType(Enum):
15
16
  NONE = "None"
@@ -2,9 +2,10 @@
2
2
  # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
6
5
  from pydantic.v1 import BaseModel, Field
7
6
 
7
+ from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
8
+
8
9
 
9
10
  class ExcelFormat(BaseModel):
10
11
  class Config(OneOfOptionConfig):
@@ -5,6 +5,8 @@
5
5
  from enum import Enum
6
6
  from typing import Any, List, Mapping, Optional, Union
7
7
 
8
+ from pydantic.v1 import BaseModel, Field, validator
9
+
8
10
  from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
9
11
  from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
10
12
  from airbyte_cdk.sources.file_based.config.excel_format import ExcelFormat
@@ -13,7 +15,6 @@ from airbyte_cdk.sources.file_based.config.parquet_format import ParquetFormat
13
15
  from airbyte_cdk.sources.file_based.config.unstructured_format import UnstructuredFormat
14
16
  from airbyte_cdk.sources.file_based.exceptions import ConfigValidationError, FileBasedSourceError
15
17
  from airbyte_cdk.sources.file_based.schema_helpers import type_mapping_to_jsonschema
16
- from pydantic.v1 import BaseModel, Field, validator
17
18
 
18
19
  PrimaryKeyType = Optional[Union[str, List[str]]]
19
20
 
@@ -2,9 +2,10 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
6
5
  from pydantic.v1 import BaseModel, Field
7
6
 
7
+ from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
8
+
8
9
 
9
10
  class JsonlFormat(BaseModel):
10
11
  class Config(OneOfOptionConfig):
@@ -3,9 +3,10 @@
3
3
  #
4
4
 
5
5
 
6
- from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
7
6
  from pydantic.v1 import BaseModel, Field
8
7
 
8
+ from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
9
+
9
10
 
10
11
  class ParquetFormat(BaseModel):
11
12
  class Config(OneOfOptionConfig):
@@ -4,9 +4,10 @@
4
4
 
5
5
  from typing import List, Literal, Optional, Union
6
6
 
7
- from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
8
7
  from pydantic.v1 import BaseModel, Field
9
8
 
9
+ from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
10
+
10
11
 
11
12
  class LocalProcessingConfigModel(BaseModel):
12
13
  mode: Literal["local"] = Field("local", const=True)
@@ -8,6 +8,8 @@ from abc import ABC
8
8
  from collections import Counter
9
9
  from typing import Any, Iterator, List, Mapping, Optional, Tuple, Type, Union
10
10
 
11
+ from pydantic.v1.error_wrappers import ValidationError
12
+
11
13
  from airbyte_cdk.logger import AirbyteLogFormatter, init_logger
12
14
  from airbyte_cdk.models import (
13
15
  AirbyteMessage,
@@ -60,7 +62,6 @@ from airbyte_cdk.sources.streams import Stream
60
62
  from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
61
63
  from airbyte_cdk.utils.analytics_message import create_analytics_message
62
64
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
63
- from pydantic.v1.error_wrappers import ValidationError
64
65
 
65
66
  DEFAULT_CONCURRENCY = 100
66
67
  MAX_CONCURRENCY = 100
@@ -10,9 +10,10 @@ from io import IOBase
10
10
  from os import makedirs, path
11
11
  from typing import Any, Dict, Iterable, List, Optional, Set
12
12
 
13
+ from wcmatch.glob import GLOBSTAR, globmatch
14
+
13
15
  from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
14
16
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
15
- from wcmatch.glob import GLOBSTAR, globmatch
16
17
 
17
18
 
18
19
  class FileReadMode(Enum):
@@ -6,6 +6,7 @@ import logging
6
6
  from typing import Any, Dict, Iterable, Mapping, Optional, Tuple
7
7
 
8
8
  import fastavro
9
+
9
10
  from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
10
11
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
11
12
  from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError
@@ -12,6 +12,8 @@ from io import IOBase
12
12
  from typing import Any, Callable, Dict, Generator, Iterable, List, Mapping, Optional, Set, Tuple
13
13
  from uuid import uuid4
14
14
 
15
+ from orjson import orjson
16
+
15
17
  from airbyte_cdk.models import FailureType
16
18
  from airbyte_cdk.sources.file_based.config.csv_format import (
17
19
  CsvFormat,
@@ -29,7 +31,6 @@ from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeP
29
31
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
30
32
  from airbyte_cdk.sources.file_based.schema_helpers import TYPE_PYTHON_MAPPING, SchemaType
31
33
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
32
- from orjson import orjson
33
34
 
34
35
  DIALECT_NAME = "_config_dialect"
35
36
 
@@ -8,6 +8,11 @@ from pathlib import Path
8
8
  from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union
9
9
 
10
10
  import pandas as pd
11
+ from numpy import datetime64, issubdtype
12
+ from numpy import dtype as dtype_
13
+ from orjson import orjson
14
+ from pydantic.v1 import BaseModel
15
+
11
16
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import (
12
17
  ExcelFormat,
13
18
  FileBasedStreamConfig,
@@ -24,11 +29,6 @@ from airbyte_cdk.sources.file_based.file_based_stream_reader import (
24
29
  from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
25
30
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
26
31
  from airbyte_cdk.sources.file_based.schema_helpers import SchemaType
27
- from numpy import datetime64
28
- from numpy import dtype as dtype_
29
- from numpy import issubdtype
30
- from orjson import orjson
31
- from pydantic.v1 import BaseModel
32
32
 
33
33
 
34
34
  class ExcelParser(FileTypeParser):
@@ -6,6 +6,8 @@ import json
6
6
  import logging
7
7
  from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union
8
8
 
9
+ from orjson import orjson
10
+
9
11
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
10
12
  from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError
11
13
  from airbyte_cdk.sources.file_based.file_based_stream_reader import (
@@ -19,7 +21,6 @@ from airbyte_cdk.sources.file_based.schema_helpers import (
19
21
  SchemaType,
20
22
  merge_schemas,
21
23
  )
22
- from orjson import orjson
23
24
 
24
25
 
25
26
  class JsonlParser(FileTypeParser):
@@ -10,6 +10,8 @@ from urllib.parse import unquote
10
10
 
11
11
  import pyarrow as pa
12
12
  import pyarrow.parquet as pq
13
+ from pyarrow import DictionaryArray, Scalar
14
+
13
15
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import (
14
16
  FileBasedStreamConfig,
15
17
  ParquetFormat,
@@ -26,7 +28,6 @@ from airbyte_cdk.sources.file_based.file_based_stream_reader import (
26
28
  from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
27
29
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
28
30
  from airbyte_cdk.sources.file_based.schema_helpers import SchemaType
29
- from pyarrow import DictionaryArray, Scalar
30
31
 
31
32
 
32
33
  class ParquetParser(FileTypeParser):
@@ -9,7 +9,16 @@ from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple, Union
9
9
 
10
10
  import backoff
11
11
  import dpath
12
+ import nltk
12
13
  import requests
14
+ from unstructured.file_utils.filetype import (
15
+ EXT_TO_FILETYPE,
16
+ FILETYPE_TO_MIMETYPE,
17
+ STR_TO_FILETYPE,
18
+ FileType,
19
+ detect_filetype,
20
+ )
21
+
13
22
  from airbyte_cdk.models import FailureType
14
23
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
15
24
  from airbyte_cdk.sources.file_based.config.unstructured_format import (
@@ -28,14 +37,6 @@ from airbyte_cdk.sources.file_based.remote_file import RemoteFile
28
37
  from airbyte_cdk.sources.file_based.schema_helpers import SchemaType
29
38
  from airbyte_cdk.utils import is_cloud_environment
30
39
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
31
- from unstructured.file_utils.filetype import (
32
- EXT_TO_FILETYPE,
33
- FILETYPE_TO_MIMETYPE,
34
- STR_TO_FILETYPE,
35
- FileType,
36
- detect_filetype,
37
- )
38
- import nltk
39
40
 
40
41
  unstructured_partition_pdf = None
41
42
  unstructured_partition_docx = None
@@ -6,6 +6,8 @@ from abc import abstractmethod
6
6
  from functools import cache, cached_property, lru_cache
7
7
  from typing import Any, Dict, Iterable, List, Mapping, Optional, Type
8
8
 
9
+ from deprecated import deprecated
10
+
9
11
  from airbyte_cdk import AirbyteMessage
10
12
  from airbyte_cdk.models import SyncMode
11
13
  from airbyte_cdk.sources.file_based.availability_strategy import (
@@ -30,7 +32,6 @@ from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor
30
32
  from airbyte_cdk.sources.file_based.types import StreamSlice
31
33
  from airbyte_cdk.sources.streams import Stream
32
34
  from airbyte_cdk.sources.streams.checkpoint import Cursor
33
- from deprecated import deprecated
34
35
 
35
36
 
36
37
  class AbstractFileBasedStream(Stream):
@@ -7,6 +7,8 @@ import logging
7
7
  from functools import cache, lru_cache
8
8
  from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union
9
9
 
10
+ from deprecated.classic import deprecated
11
+
10
12
  from airbyte_cdk.models import (
11
13
  AirbyteLogMessage,
12
14
  AirbyteMessage,
@@ -39,11 +41,10 @@ from airbyte_cdk.sources.streams.concurrent.helpers import (
39
41
  )
40
42
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
41
43
  from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
42
- from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
43
44
  from airbyte_cdk.sources.streams.core import StreamData
45
+ from airbyte_cdk.sources.types import Record
44
46
  from airbyte_cdk.sources.utils.schema_helpers import InternalConfig
45
47
  from airbyte_cdk.sources.utils.slice_logger import SliceLogger
46
- from deprecated.classic import deprecated
47
48
 
48
49
  if TYPE_CHECKING:
49
50
  from airbyte_cdk.sources.file_based.stream.concurrent.cursor import (
@@ -247,7 +248,7 @@ class FileBasedStreamPartition(Partition):
247
248
  self._stream.transformer.transform(
248
249
  data_to_return, self._stream.get_json_schema()
249
250
  )
250
- yield Record(data_to_return, self)
251
+ yield Record(data=data_to_return, stream_name=self.stream_name())
251
252
  elif (
252
253
  isinstance(record_data, AirbyteMessage)
253
254
  and record_data.type == Type.RECORD
@@ -265,7 +266,7 @@ class FileBasedStreamPartition(Partition):
265
266
  else:
266
267
  yield Record(
267
268
  data=record_message_data,
268
- partition=self,
269
+ stream_name=self.stream_name(),
269
270
  is_file_transfer_message=self._use_file_transfer(),
270
271
  )
271
272
  else:
@@ -12,7 +12,7 @@ from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor
12
12
  from airbyte_cdk.sources.file_based.types import StreamState
13
13
  from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
14
14
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
15
- from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
15
+ from airbyte_cdk.sources.types import Record
16
16
 
17
17
  if TYPE_CHECKING:
18
18
  from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamPartition
@@ -19,7 +19,7 @@ from airbyte_cdk.sources.file_based.types import StreamState
19
19
  from airbyte_cdk.sources.message.repository import MessageRepository
20
20
  from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
21
21
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
22
- from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
22
+ from airbyte_cdk.sources.types import Record
23
23
 
24
24
  if TYPE_CHECKING:
25
25
  from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamPartition
@@ -16,7 +16,7 @@ from airbyte_cdk.sources.file_based.types import StreamState
16
16
  from airbyte_cdk.sources.message import MessageRepository
17
17
  from airbyte_cdk.sources.streams import NO_CURSOR_STATE_KEY
18
18
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
19
- from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
19
+ from airbyte_cdk.sources.types import Record
20
20
 
21
21
  if TYPE_CHECKING:
22
22
  from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamPartition
@@ -5,6 +5,7 @@
5
5
  from typing import Optional, Union
6
6
 
7
7
  import requests
8
+
8
9
  from airbyte_cdk.sources.message import LogMessage
9
10
 
10
11
 
@@ -14,9 +14,8 @@ from urllib import parse
14
14
 
15
15
  import requests
16
16
  import requests_cache
17
- from pyrate_limiter import InMemoryBucket, Limiter
17
+ from pyrate_limiter import InMemoryBucket, Limiter, RateItem, TimeClock
18
18
  from pyrate_limiter import Rate as PyRateRate
19
- from pyrate_limiter import RateItem, TimeClock
20
19
  from pyrate_limiter.exceptions import BucketFullException
21
20
 
22
21
  # prevents mypy from complaining about missing session attributes in LimiterMixin
@@ -5,12 +5,13 @@
5
5
  from abc import ABC, abstractmethod
6
6
  from typing import Any, Iterable, Mapping, Optional
7
7
 
8
+ from deprecated.classic import deprecated
9
+
8
10
  from airbyte_cdk.models import AirbyteStream
9
11
  from airbyte_cdk.sources.source import ExperimentalClassWarning
10
12
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import StreamAvailability
11
13
  from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
12
14
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
13
- from deprecated.classic import deprecated
14
15
 
15
16
 
16
17
  @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
@@ -8,6 +8,8 @@ import logging
8
8
  from functools import lru_cache
9
9
  from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union
10
10
 
11
+ from deprecated.classic import deprecated
12
+
11
13
  from airbyte_cdk.models import (
12
14
  AirbyteLogMessage,
13
15
  AirbyteMessage,
@@ -37,12 +39,10 @@ from airbyte_cdk.sources.streams.concurrent.helpers import (
37
39
  )
38
40
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
39
41
  from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
40
- from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
41
42
  from airbyte_cdk.sources.streams.core import StreamData
43
+ from airbyte_cdk.sources.types import Record
42
44
  from airbyte_cdk.sources.utils.schema_helpers import InternalConfig
43
45
  from airbyte_cdk.sources.utils.slice_logger import SliceLogger
44
- from deprecated.classic import deprecated
45
-
46
46
  from airbyte_cdk.utils.slice_hasher import SliceHasher
47
47
 
48
48
  """
@@ -294,7 +294,11 @@ class StreamPartition(Partition):
294
294
  self._stream.transformer.transform(
295
295
  data_to_return, self._stream.get_json_schema()
296
296
  )
297
- yield Record(data_to_return, self)
297
+ yield Record(
298
+ data=data_to_return,
299
+ stream_name=self.stream_name(),
300
+ associated_slice=self._slice,
301
+ )
298
302
  else:
299
303
  self._message_repository.emit_message(record_data)
300
304
  except Exception as e:
@@ -6,9 +6,10 @@ import logging
6
6
  from abc import ABC, abstractmethod
7
7
  from typing import Optional
8
8
 
9
- from airbyte_cdk.sources.source import ExperimentalClassWarning
10
9
  from deprecated.classic import deprecated
11
10
 
11
+ from airbyte_cdk.sources.source import ExperimentalClassWarning
12
+
12
13
 
13
14
  class StreamAvailability(ABC):
14
15
  @abstractmethod
@@ -3,19 +3,32 @@
3
3
  #
4
4
 
5
5
  import functools
6
+ import logging
6
7
  from abc import ABC, abstractmethod
7
- from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional, Protocol, Tuple
8
+ from typing import (
9
+ Any,
10
+ Callable,
11
+ Iterable,
12
+ List,
13
+ Mapping,
14
+ MutableMapping,
15
+ Optional,
16
+ Protocol,
17
+ Tuple,
18
+ Union,
19
+ )
8
20
 
9
21
  from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
10
22
  from airbyte_cdk.sources.message import MessageRepository
11
23
  from airbyte_cdk.sources.streams import NO_CURSOR_STATE_KEY
12
24
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
13
- from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
14
25
  from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
15
26
  from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import (
16
27
  AbstractStreamStateConverter,
17
28
  )
18
- from airbyte_cdk.sources.types import StreamSlice
29
+ from airbyte_cdk.sources.types import Record, StreamSlice
30
+
31
+ LOGGER = logging.getLogger("airbyte")
19
32
 
20
33
 
21
34
  def _extract_value(mapping: Mapping[str, Any], path: List[str]) -> Any:
@@ -173,9 +186,13 @@ class ConcurrentCursor(Cursor):
173
186
  self.start, self._concurrent_state = self._get_concurrent_state(stream_state)
174
187
  self._lookback_window = lookback_window
175
188
  self._slice_range = slice_range
176
- self._most_recent_cursor_value_per_partition: MutableMapping[Partition, Any] = {}
189
+ self._most_recent_cursor_value_per_partition: MutableMapping[
190
+ Union[StreamSlice, Mapping[str, Any], None], Any
191
+ ] = {}
177
192
  self._has_closed_at_least_one_slice = False
178
193
  self._cursor_granularity = cursor_granularity
194
+ # Flag to track if the logger has been triggered (per stream)
195
+ self._should_be_synced_logger_triggered = False
179
196
 
180
197
  @property
181
198
  def state(self) -> MutableMapping[str, Any]:
@@ -210,12 +227,15 @@ class ConcurrentCursor(Cursor):
210
227
 
211
228
  def observe(self, record: Record) -> None:
212
229
  most_recent_cursor_value = self._most_recent_cursor_value_per_partition.get(
213
- record.partition
230
+ record.associated_slice
214
231
  )
215
- cursor_value = self._extract_cursor_value(record)
232
+ try:
233
+ cursor_value = self._extract_cursor_value(record)
216
234
 
217
- if most_recent_cursor_value is None or most_recent_cursor_value < cursor_value:
218
- self._most_recent_cursor_value_per_partition[record.partition] = cursor_value
235
+ if most_recent_cursor_value is None or most_recent_cursor_value < cursor_value:
236
+ self._most_recent_cursor_value_per_partition[record.associated_slice] = cursor_value
237
+ except ValueError:
238
+ self._log_for_record_without_cursor_value()
219
239
 
220
240
  def _extract_cursor_value(self, record: Record) -> Any:
221
241
  return self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
@@ -231,7 +251,9 @@ class ConcurrentCursor(Cursor):
231
251
  self._has_closed_at_least_one_slice = True
232
252
 
233
253
  def _add_slice_to_state(self, partition: Partition) -> None:
234
- most_recent_cursor_value = self._most_recent_cursor_value_per_partition.get(partition)
254
+ most_recent_cursor_value = self._most_recent_cursor_value_per_partition.get(
255
+ partition.to_slice()
256
+ )
235
257
 
236
258
  if self._slice_boundary_fields:
237
259
  if "slices" not in self.state:
@@ -442,3 +464,24 @@ class ConcurrentCursor(Cursor):
442
464
  return lower + step
443
465
  except OverflowError:
444
466
  return self._end_provider()
467
+
468
+ def should_be_synced(self, record: Record) -> bool:
469
+ """
470
+ Determines if a record should be synced based on its cursor value.
471
+ :param record: The record to evaluate
472
+
473
+ :return: True if the record's cursor value falls within the sync boundaries
474
+ """
475
+ try:
476
+ record_cursor_value: CursorValueType = self._extract_cursor_value(record) # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__
477
+ except ValueError:
478
+ self._log_for_record_without_cursor_value()
479
+ return True
480
+ return self.start <= record_cursor_value <= self._end_provider()
481
+
482
+ def _log_for_record_without_cursor_value(self) -> None:
483
+ if not self._should_be_synced_logger_triggered:
484
+ LOGGER.warning(
485
+ f"Could not find cursor field `{self.cursor_field.cursor_field_key}` in record for stream {self._stream_name}. The incremental sync will assume it needs to be synced"
486
+ )
487
+ self._should_be_synced_logger_triggered = True
@@ -67,6 +67,7 @@ class DefaultStream(AbstractStream):
67
67
  name=self.name,
68
68
  json_schema=dict(self._json_schema),
69
69
  supported_sync_modes=[SyncMode.full_refresh],
70
+ is_resumable=False,
70
71
  )
71
72
 
72
73
  if self._namespace:
@@ -5,7 +5,7 @@
5
5
  from abc import ABC, abstractmethod
6
6
  from typing import Any, Iterable, Mapping, Optional
7
7
 
8
- from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
8
+ from airbyte_cdk.sources.types import Record
9
9
 
10
10
 
11
11
  class Partition(ABC):
@@ -8,7 +8,7 @@ from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentin
8
8
  PartitionGenerationCompletedSentinel,
9
9
  )
10
10
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
11
- from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
11
+ from airbyte_cdk.sources.types import Record
12
12
 
13
13
 
14
14
  class PartitionCompleteSentinel:
@@ -7,6 +7,7 @@ from datetime import datetime, timedelta, timezone
7
7
  from typing import Any, Callable, List, MutableMapping, Optional, Tuple
8
8
 
9
9
  import pendulum
10
+ from pendulum.datetime import DateTime
10
11
 
11
12
  # FIXME We would eventually like the Concurrent package do be agnostic of the declarative package. However, this is a breaking change and
12
13
  # the goal in the short term is only to fix the issue we are seeing for source-declarative-manifest.
@@ -16,7 +17,6 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_sta
16
17
  AbstractStreamStateConverter,
17
18
  ConcurrencyCompatibleStateType,
18
19
  )
19
- from pendulum.datetime import DateTime
20
20
 
21
21
 
22
22
  class DateTimeStreamStateConverter(AbstractStreamStateConverter):