airbyte-cdk 6.7.0__py3-none-any.whl → 6.7.0.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +1 -2
  2. airbyte_cdk/config_observation.py +1 -2
  3. airbyte_cdk/connector.py +0 -1
  4. airbyte_cdk/connector_builder/connector_builder_handler.py +1 -1
  5. airbyte_cdk/connector_builder/main.py +1 -2
  6. airbyte_cdk/destinations/destination.py +1 -2
  7. airbyte_cdk/destinations/vector_db_based/config.py +1 -2
  8. airbyte_cdk/destinations/vector_db_based/document_processor.py +3 -4
  9. airbyte_cdk/destinations/vector_db_based/embedder.py +4 -5
  10. airbyte_cdk/entrypoint.py +2 -3
  11. airbyte_cdk/logger.py +1 -2
  12. airbyte_cdk/models/airbyte_protocol.py +1 -2
  13. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +3 -3
  14. airbyte_cdk/sources/concurrent_source/concurrent_source.py +1 -1
  15. airbyte_cdk/sources/config.py +1 -2
  16. airbyte_cdk/sources/declarative/auth/jwt.py +0 -1
  17. airbyte_cdk/sources/declarative/auth/oauth.py +0 -1
  18. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +0 -1
  19. airbyte_cdk/sources/declarative/auth/token.py +1 -2
  20. airbyte_cdk/sources/declarative/auth/token_provider.py +2 -3
  21. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +4 -6
  22. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +13 -13
  23. airbyte_cdk/sources/declarative/decoders/json_decoder.py +2 -3
  24. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +0 -1
  25. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +0 -1
  26. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +0 -1
  27. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +0 -1
  28. airbyte_cdk/sources/declarative/extractors/http_selector.py +0 -1
  29. airbyte_cdk/sources/declarative/extractors/record_filter.py +48 -6
  30. airbyte_cdk/sources/declarative/extractors/record_selector.py +4 -32
  31. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +1 -2
  32. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +1 -2
  33. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +2 -5
  34. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +2 -5
  35. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +3 -1
  36. airbyte_cdk/sources/declarative/interpolation/jinja.py +4 -5
  37. airbyte_cdk/sources/declarative/manifest_declarative_source.py +3 -4
  38. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +1 -1
  39. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +2 -7
  40. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +0 -1
  41. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +0 -1
  42. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +0 -1
  43. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +0 -1
  44. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +0 -1
  45. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +0 -1
  46. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +0 -1
  47. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +0 -1
  48. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +0 -1
  49. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +1 -2
  50. airbyte_cdk/sources/declarative/requesters/http_requester.py +0 -1
  51. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +0 -1
  52. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +0 -1
  53. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +0 -1
  54. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +0 -1
  55. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +0 -1
  56. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +0 -1
  57. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +0 -1
  58. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +3 -9
  59. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +1 -2
  60. airbyte_cdk/sources/declarative/requesters/requester.py +0 -1
  61. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +1 -2
  62. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +4 -12
  63. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +4 -7
  64. airbyte_cdk/sources/declarative/transformations/add_fields.py +0 -1
  65. airbyte_cdk/sources/declarative/transformations/remove_fields.py +0 -1
  66. airbyte_cdk/sources/declarative/yaml_declarative_source.py +0 -1
  67. airbyte_cdk/sources/embedded/tools.py +0 -1
  68. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +1 -2
  69. airbyte_cdk/sources/file_based/config/avro_format.py +1 -2
  70. airbyte_cdk/sources/file_based/config/csv_format.py +1 -2
  71. airbyte_cdk/sources/file_based/config/excel_format.py +1 -2
  72. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +1 -2
  73. airbyte_cdk/sources/file_based/config/jsonl_format.py +1 -2
  74. airbyte_cdk/sources/file_based/config/parquet_format.py +1 -2
  75. airbyte_cdk/sources/file_based/config/unstructured_format.py +1 -2
  76. airbyte_cdk/sources/file_based/file_based_source.py +1 -2
  77. airbyte_cdk/sources/file_based/file_based_stream_reader.py +1 -2
  78. airbyte_cdk/sources/file_based/file_types/avro_parser.py +0 -1
  79. airbyte_cdk/sources/file_based/file_types/csv_parser.py +1 -2
  80. airbyte_cdk/sources/file_based/file_types/excel_parser.py +5 -5
  81. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +1 -2
  82. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +1 -2
  83. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +8 -9
  84. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +1 -2
  85. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +4 -5
  86. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +1 -1
  87. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +1 -1
  88. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +1 -1
  89. airbyte_cdk/sources/http_logger.py +0 -1
  90. airbyte_cdk/sources/streams/call_rate.py +2 -1
  91. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +1 -2
  92. airbyte_cdk/sources/streams/concurrent/adapters.py +4 -8
  93. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +1 -2
  94. airbyte_cdk/sources/streams/concurrent/cursor.py +6 -30
  95. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -1
  96. airbyte_cdk/sources/streams/concurrent/partitions/record.py +35 -0
  97. airbyte_cdk/sources/streams/concurrent/partitions/types.py +1 -1
  98. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +1 -1
  99. airbyte_cdk/sources/streams/core.py +1 -2
  100. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +1 -2
  101. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +0 -1
  102. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +0 -1
  103. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +1 -2
  104. airbyte_cdk/sources/streams/http/http.py +2 -3
  105. airbyte_cdk/sources/streams/http/http_client.py +2 -49
  106. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +1 -2
  107. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +0 -1
  108. airbyte_cdk/sources/types.py +1 -14
  109. airbyte_cdk/sources/utils/schema_helpers.py +2 -3
  110. airbyte_cdk/sql/secrets.py +1 -2
  111. airbyte_cdk/sql/shared/sql_processor.py +6 -8
  112. airbyte_cdk/test/entrypoint_wrapper.py +3 -4
  113. airbyte_cdk/test/mock_http/mocker.py +0 -1
  114. airbyte_cdk/utils/schema_inferrer.py +1 -2
  115. airbyte_cdk/utils/slice_hasher.py +1 -1
  116. airbyte_cdk/utils/traced_exception.py +1 -2
  117. {airbyte_cdk-6.7.0.dist-info → airbyte_cdk-6.7.0.dev4.dist-info}/METADATA +2 -9
  118. {airbyte_cdk-6.7.0.dist-info → airbyte_cdk-6.7.0.dev4.dist-info}/RECORD +121 -120
  119. {airbyte_cdk-6.7.0.dist-info → airbyte_cdk-6.7.0.dev4.dist-info}/LICENSE.txt +0 -0
  120. {airbyte_cdk-6.7.0.dist-info → airbyte_cdk-6.7.0.dev4.dist-info}/WHEEL +0 -0
  121. {airbyte_cdk-6.7.0.dist-info → airbyte_cdk-6.7.0.dev4.dist-info}/entry_points.txt +0 -0
@@ -25,8 +25,6 @@ from datetime import datetime
25
25
  from pathlib import Path
26
26
  from typing import Any, cast
27
27
 
28
- from orjson import orjson
29
-
30
28
  from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch
31
29
  from airbyte_cdk.models import (
32
30
  AirbyteErrorTraceMessage,
@@ -44,6 +42,7 @@ from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
44
42
  )
45
43
  from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource
46
44
  from airbyte_cdk.sources.source import TState
45
+ from orjson import orjson
47
46
 
48
47
 
49
48
  class SourceLocalYaml(YamlDeclarativeSource):
@@ -10,8 +10,6 @@ import time
10
10
  from copy import copy
11
11
  from typing import Any, List, MutableMapping
12
12
 
13
- from orjson import orjson
14
-
15
13
  from airbyte_cdk.models import (
16
14
  AirbyteControlConnectorConfigMessage,
17
15
  AirbyteControlMessage,
@@ -20,6 +18,7 @@ from airbyte_cdk.models import (
20
18
  OrchestratorType,
21
19
  Type,
22
20
  )
21
+ from orjson import orjson
23
22
 
24
23
 
25
24
  class ObservedDict(dict): # type: ignore # disallow_any_generics is set to True, and dict is equivalent to dict[Any]
airbyte_cdk/connector.py CHANGED
@@ -11,7 +11,6 @@ from abc import ABC, abstractmethod
11
11
  from typing import Any, Generic, Mapping, Optional, Protocol, TypeVar
12
12
 
13
13
  import yaml
14
-
15
14
  from airbyte_cdk.models import (
16
15
  AirbyteConnectionStatus,
17
16
  ConnectorSpecification,
@@ -12,8 +12,8 @@ from airbyte_cdk.models import (
12
12
  AirbyteRecordMessage,
13
13
  AirbyteStateMessage,
14
14
  ConfiguredAirbyteCatalog,
15
- Type,
16
15
  )
16
+ from airbyte_cdk.models import Type
17
17
  from airbyte_cdk.models import Type as MessageType
18
18
  from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
19
19
  from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
@@ -6,8 +6,6 @@
6
6
  import sys
7
7
  from typing import Any, List, Mapping, Optional, Tuple
8
8
 
9
- from orjson import orjson
10
-
11
9
  from airbyte_cdk.connector import BaseConnector
12
10
  from airbyte_cdk.connector_builder.connector_builder_handler import (
13
11
  TestReadLimits,
@@ -27,6 +25,7 @@ from airbyte_cdk.models import (
27
25
  from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
28
26
  from airbyte_cdk.sources.source import Source
29
27
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
28
+ from orjson import orjson
30
29
 
31
30
 
32
31
  def get_config_and_catalog_from_args(
@@ -9,8 +9,6 @@ import sys
9
9
  from abc import ABC, abstractmethod
10
10
  from typing import Any, Iterable, List, Mapping
11
11
 
12
- from orjson import orjson
13
-
14
12
  from airbyte_cdk.connector import Connector
15
13
  from airbyte_cdk.exception_handler import init_uncaught_exception_handler
16
14
  from airbyte_cdk.models import (
@@ -22,6 +20,7 @@ from airbyte_cdk.models import (
22
20
  )
23
21
  from airbyte_cdk.sources.utils.schema_helpers import check_config_against_spec_or_exit
24
22
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
23
+ from orjson import orjson
25
24
 
26
25
  logger = logging.getLogger("airbyte")
27
26
 
@@ -5,10 +5,9 @@
5
5
  from typing import Any, Dict, List, Literal, Optional, Union
6
6
 
7
7
  import dpath
8
- from pydantic.v1 import BaseModel, Field
9
-
10
8
  from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
11
9
  from airbyte_cdk.utils.spec_schema_transformations import resolve_refs
10
+ from pydantic.v1 import BaseModel, Field
12
11
 
13
12
 
14
13
  class SeparatorSplitterConfigModel(BaseModel):
@@ -8,10 +8,6 @@ from dataclasses import dataclass
8
8
  from typing import Any, Dict, List, Mapping, Optional, Tuple
9
9
 
10
10
  import dpath
11
- from langchain.text_splitter import Language, RecursiveCharacterTextSplitter
12
- from langchain.utils import stringify_dict
13
- from langchain_core.documents.base import Document
14
-
15
11
  from airbyte_cdk.destinations.vector_db_based.config import (
16
12
  ProcessingConfigModel,
17
13
  SeparatorSplitterConfigModel,
@@ -25,6 +21,9 @@ from airbyte_cdk.models import (
25
21
  DestinationSyncMode,
26
22
  )
27
23
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException, FailureType
24
+ from langchain.text_splitter import Language, RecursiveCharacterTextSplitter
25
+ from langchain.utils import stringify_dict
26
+ from langchain_core.documents.base import Document
28
27
 
29
28
  METADATA_STREAM_FIELD = "_ab_stream"
30
29
  METADATA_RECORD_ID_FIELD = "_ab_record_id"
@@ -7,11 +7,6 @@ from abc import ABC, abstractmethod
7
7
  from dataclasses import dataclass
8
8
  from typing import List, Optional, Union, cast
9
9
 
10
- from langchain.embeddings.cohere import CohereEmbeddings
11
- from langchain.embeddings.fake import FakeEmbeddings
12
- from langchain.embeddings.localai import LocalAIEmbeddings
13
- from langchain.embeddings.openai import OpenAIEmbeddings
14
-
15
10
  from airbyte_cdk.destinations.vector_db_based.config import (
16
11
  AzureOpenAIEmbeddingConfigModel,
17
12
  CohereEmbeddingConfigModel,
@@ -24,6 +19,10 @@ from airbyte_cdk.destinations.vector_db_based.config import (
24
19
  from airbyte_cdk.destinations.vector_db_based.utils import create_chunks, format_exception
25
20
  from airbyte_cdk.models import AirbyteRecordMessage
26
21
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException, FailureType
22
+ from langchain.embeddings.cohere import CohereEmbeddings
23
+ from langchain.embeddings.fake import FakeEmbeddings
24
+ from langchain.embeddings.localai import LocalAIEmbeddings
25
+ from langchain.embeddings.openai import OpenAIEmbeddings
27
26
 
28
27
 
29
28
  @dataclass
airbyte_cdk/entrypoint.py CHANGED
@@ -16,9 +16,6 @@ from typing import Any, DefaultDict, Iterable, List, Mapping, Optional
16
16
  from urllib.parse import urlparse
17
17
 
18
18
  import requests
19
- from orjson import orjson
20
- from requests import PreparedRequest, Response, Session
21
-
22
19
  from airbyte_cdk.connector import TConfig
23
20
  from airbyte_cdk.exception_handler import init_uncaught_exception_handler
24
21
  from airbyte_cdk.logger import init_logger
@@ -41,6 +38,8 @@ from airbyte_cdk.utils import is_cloud_environment, message_utils
41
38
  from airbyte_cdk.utils.airbyte_secrets_utils import get_secrets, update_secrets
42
39
  from airbyte_cdk.utils.constants import ENV_REQUEST_CACHE_PATH
43
40
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
41
+ from orjson import orjson
42
+ from requests import PreparedRequest, Response, Session
44
43
 
45
44
  logger = init_logger("airbyte")
46
45
 
airbyte_cdk/logger.py CHANGED
@@ -7,8 +7,6 @@ import logging
7
7
  import logging.config
8
8
  from typing import Any, Callable, Mapping, Optional, Tuple
9
9
 
10
- from orjson import orjson
11
-
12
10
  from airbyte_cdk.models import (
13
11
  AirbyteLogMessage,
14
12
  AirbyteMessage,
@@ -17,6 +15,7 @@ from airbyte_cdk.models import (
17
15
  Type,
18
16
  )
19
17
  from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
18
+ from orjson import orjson
20
19
 
21
20
  LOGGING_CONFIG = {
22
21
  "version": 1,
@@ -5,11 +5,10 @@
5
5
  from dataclasses import InitVar, dataclass
6
6
  from typing import Annotated, Any, Dict, List, Mapping, Optional, Union
7
7
 
8
+ from airbyte_cdk.models.file_transfer_record_message import AirbyteFileTransferRecordMessage
8
9
  from airbyte_protocol_dataclasses.models import * # noqa: F403 # Allow '*'
9
10
  from serpyco_rs.metadata import Alias
10
11
 
11
- from airbyte_cdk.models.file_transfer_record_message import AirbyteFileTransferRecordMessage
12
-
13
12
  # ruff: noqa: F405 # ignore fuzzy import issues with 'import *'
14
13
 
15
14
 
@@ -17,8 +17,8 @@ from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStrea
17
17
  from airbyte_cdk.sources.streams.concurrent.partition_enqueuer import PartitionEnqueuer
18
18
  from airbyte_cdk.sources.streams.concurrent.partition_reader import PartitionReader
19
19
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
20
+ from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
20
21
  from airbyte_cdk.sources.streams.concurrent.partitions.types import PartitionCompleteSentinel
21
- from airbyte_cdk.sources.types import Record
22
22
  from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
23
23
  from airbyte_cdk.sources.utils.slice_logger import SliceLogger
24
24
  from airbyte_cdk.utils import AirbyteTracedException
@@ -147,11 +147,11 @@ class ConcurrentReadProcessor:
147
147
  # AbstractStreams are expected to return data as they are expected.
148
148
  # Any transformation on the data should be done before reaching this point
149
149
  message = stream_data_to_airbyte_message(
150
- stream_name=record.stream_name,
150
+ stream_name=record.partition.stream_name(),
151
151
  data_or_message=record.data,
152
152
  is_file_transfer_message=record.is_file_transfer_message,
153
153
  )
154
- stream = self._stream_name_to_instance[record.stream_name]
154
+ stream = self._stream_name_to_instance[record.partition.stream_name()]
155
155
 
156
156
  if message.type == MessageType.RECORD:
157
157
  if self._record_counter[stream.name] == 0:
@@ -18,11 +18,11 @@ from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStrea
18
18
  from airbyte_cdk.sources.streams.concurrent.partition_enqueuer import PartitionEnqueuer
19
19
  from airbyte_cdk.sources.streams.concurrent.partition_reader import PartitionReader
20
20
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
21
+ from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
21
22
  from airbyte_cdk.sources.streams.concurrent.partitions.types import (
22
23
  PartitionCompleteSentinel,
23
24
  QueueItem,
24
25
  )
25
- from airbyte_cdk.sources.types import Record
26
26
  from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger, SliceLogger
27
27
 
28
28
 
@@ -4,9 +4,8 @@
4
4
 
5
5
  from typing import Any, Dict
6
6
 
7
- from pydantic.v1 import BaseModel
8
-
9
7
  from airbyte_cdk.sources.utils.schema_helpers import expand_refs, rename_key
8
+ from pydantic.v1 import BaseModel
10
9
 
11
10
 
12
11
  class BaseConfig(BaseModel):
@@ -8,7 +8,6 @@ from datetime import datetime
8
8
  from typing import Any, Mapping, Optional, Union
9
9
 
10
10
  import jwt
11
-
12
11
  from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
13
12
  from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
14
13
  from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping
@@ -6,7 +6,6 @@ from dataclasses import InitVar, dataclass, field
6
6
  from typing import Any, List, Mapping, Optional, Union
7
7
 
8
8
  import pendulum
9
-
10
9
  from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
11
10
  from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping
12
11
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
@@ -6,7 +6,6 @@ from dataclasses import dataclass
6
6
  from typing import Any, List, Mapping
7
7
 
8
8
  import dpath
9
-
10
9
  from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
11
10
 
12
11
 
@@ -8,8 +8,6 @@ from dataclasses import InitVar, dataclass
8
8
  from typing import Any, Mapping, Union
9
9
 
10
10
  import requests
11
- from cachetools import TTLCache, cached
12
-
13
11
  from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
14
12
  from airbyte_cdk.sources.declarative.auth.token_provider import TokenProvider
15
13
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
@@ -18,6 +16,7 @@ from airbyte_cdk.sources.declarative.requesters.request_option import (
18
16
  RequestOptionType,
19
17
  )
20
18
  from airbyte_cdk.sources.types import Config
19
+ from cachetools import TTLCache, cached
21
20
 
22
21
 
23
22
  @dataclass
@@ -10,9 +10,6 @@ from typing import Any, List, Mapping, Optional, Union
10
10
 
11
11
  import dpath
12
12
  import pendulum
13
- from isodate import Duration
14
- from pendulum import DateTime
15
-
16
13
  from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
17
14
  from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder
18
15
  from airbyte_cdk.sources.declarative.exceptions import ReadException
@@ -21,6 +18,8 @@ from airbyte_cdk.sources.declarative.requesters.requester import Requester
21
18
  from airbyte_cdk.sources.http_logger import format_http_message
22
19
  from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
23
20
  from airbyte_cdk.sources.types import Config
21
+ from isodate import Duration
22
+ from pendulum import DateTime
24
23
 
25
24
 
26
25
  class TokenProvider:
@@ -3,7 +3,7 @@
3
3
  #
4
4
 
5
5
  import logging
6
- from typing import Any, Callable, Generic, Iterator, List, Mapping, Optional, Tuple, Union
6
+ from typing import Any, Generic, Iterator, List, Mapping, Optional, Tuple, Union, Callable
7
7
 
8
8
  from airbyte_cdk.models import (
9
9
  AirbyteCatalog,
@@ -27,16 +27,14 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
27
27
  )
28
28
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
29
29
  DatetimeBasedCursor as DatetimeBasedCursorModel,
30
- )
31
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
32
30
  DeclarativeStream as DeclarativeStreamModel,
33
31
  )
34
32
  from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
35
- ComponentDefinition,
36
33
  ModelToComponentFactory,
34
+ ComponentDefinition,
37
35
  )
38
36
  from airbyte_cdk.sources.declarative.requesters import HttpRequester
39
- from airbyte_cdk.sources.declarative.retrievers import Retriever, SimpleRetriever
37
+ from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever, Retriever
40
38
  from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
41
39
  DeclarativePartitionFactory,
42
40
  StreamSlicerPartitionGenerator,
@@ -44,6 +42,7 @@ from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_genera
44
42
  from airbyte_cdk.sources.declarative.transformations.add_fields import AddFields
45
43
  from airbyte_cdk.sources.declarative.types import ConnectionDefinition
46
44
  from airbyte_cdk.sources.source import TState
45
+ from airbyte_cdk.sources.types import Config, StreamState
47
46
  from airbyte_cdk.sources.streams import Stream
48
47
  from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
49
48
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
@@ -51,7 +50,6 @@ from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
51
50
  )
52
51
  from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
53
52
  from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
54
- from airbyte_cdk.sources.types import Config, StreamState
55
53
 
56
54
 
57
55
  class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
@@ -2790,21 +2790,21 @@ interpolation:
2790
2790
  - created_at: "2020-01-01 00:00:00.000+00:00"
2791
2791
  - updated_at: "2020-01-02 00:00:00.000+00:00"
2792
2792
  macros:
2793
- - title: now_utc
2793
+ - title: Now (UTC)
2794
2794
  description: Returns the current date and time in the UTC timezone.
2795
2795
  arguments: {}
2796
2796
  return_type: Datetime
2797
2797
  examples:
2798
2798
  - "'{{ now_utc() }}' -> '2021-09-01 00:00:00+00:00'"
2799
2799
  - "'{{ now_utc().strftime('%Y-%m-%d') }}' -> '2021-09-01'"
2800
- - title: today_utc
2800
+ - title: Today (UTC)
2801
2801
  description: Returns the current date in UTC timezone. The output is a date object.
2802
2802
  arguments: {}
2803
2803
  return_type: Date
2804
2804
  examples:
2805
2805
  - "'{{ today_utc() }}' -> '2021-09-01'"
2806
2806
  - "'{{ today_utc().strftime('%Y/%m/%d')}}' -> '2021/09/01'"
2807
- - title: timestamp
2807
+ - title: Timestamp
2808
2808
  description: Converts a number or a string representing a datetime (formatted as ISO8601) to a timestamp. If the input is a number, it is converted to an int. If no timezone is specified, the string is interpreted as UTC.
2809
2809
  arguments:
2810
2810
  datetime: A string formatted as ISO8601 or an integer representing a unix timestamp
@@ -2815,7 +2815,7 @@ interpolation:
2815
2815
  - "'{{ timestamp('2022-02-28T00:00:00Z') }}' -> 1646006400"
2816
2816
  - "'{{ timestamp('2022-02-28 00:00:00Z') }}' -> 1646006400"
2817
2817
  - "'{{ timestamp('2022-02-28T00:00:00-08:00') }}' -> 1646035200"
2818
- - title: max
2818
+ - title: Max
2819
2819
  description: Returns the largest object of a iterable, or or two or more arguments.
2820
2820
  arguments:
2821
2821
  args: iterable or a sequence of two or more arguments
@@ -2823,7 +2823,7 @@ interpolation:
2823
2823
  examples:
2824
2824
  - "'{{ max(2, 3) }}' -> 3"
2825
2825
  - "'{{ max([2, 3]) }}' -> 3"
2826
- - title: day_delta
2826
+ - title: Day Delta
2827
2827
  description: Returns the datetime of now() + num_days.
2828
2828
  arguments:
2829
2829
  num_days: The number of days to add to now
@@ -2833,8 +2833,8 @@ interpolation:
2833
2833
  - "'{{ day_delta(1) }}' -> '2021-09-02T00:00:00.000000+0000'"
2834
2834
  - "'{{ day_delta(-1) }}' -> '2021-08-31:00:00.000000+0000'"
2835
2835
  - "'{{ day_delta(25, format='%Y-%m-%d') }}' -> '2021-09-02'"
2836
- - title: duration
2837
- description: Converts an ISO8601 duration to datetime timedelta.
2836
+ - title: Duration
2837
+ description: Converts an ISO8601 duratioin to datetime.timedelta.
2838
2838
  arguments:
2839
2839
  duration_string: "A string representing an ISO8601 duration. See https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm for more details."
2840
2840
  return_type: datetime.timedelta
@@ -2842,7 +2842,7 @@ interpolation:
2842
2842
  - "'{{ duration('P1D') }}' -> '1 day, 0:00:00'"
2843
2843
  - "'{{ duration('P6DT23H') }}' -> '6 days, 23:00:00'"
2844
2844
  - "'{{ (now_utc() - duration('P1D')).strftime('%Y-%m-%dT%H:%M:%SZ') }}' -> '2021-08-31T00:00:00Z'"
2845
- - title: format_datetime
2845
+ - title: Format Datetime
2846
2846
  description: Converts a datetime or a datetime-string to the specified format.
2847
2847
  arguments:
2848
2848
  datetime: The datetime object or a string to convert. If datetime is a string, it must be formatted as ISO8601.
@@ -2854,7 +2854,7 @@ interpolation:
2854
2854
  - "{{ format_datetime(config['start_date'], '%Y-%m-%dT%H:%M:%S.%fZ') }}"
2855
2855
  - "{{ format_datetime(config['start_date'], '%Y-%m-%dT%H:%M:%S.%fZ', '%a, %d %b %Y %H:%M:%S %z') }}"
2856
2856
  filters:
2857
- - title: hash
2857
+ - title: Hash
2858
2858
  description: Convert the specified value to a hashed string.
2859
2859
  arguments:
2860
2860
  hash_type: Valid hash type for converts ('md5' as default value).
@@ -2864,26 +2864,26 @@ interpolation:
2864
2864
  - "{{ 'Test client_secret' | hash() }} -> '3032d57a12f76b61a820e47b9a5a0cbb'"
2865
2865
  - "{{ 'Test client_secret' | hash('md5') }} -> '3032d57a12f76b61a820e47b9a5a0cbb'"
2866
2866
  - "{{ 'Test client_secret' | hash('md5', salt='salt') }} -> '5011a0168579c2d94cbbe1c6ad14327c'"
2867
- - title: base64encode
2867
+ - title: Base64 encoder
2868
2868
  description: Convert the specified value to a string in the base64 format.
2869
2869
  arguments: {}
2870
2870
  return_type: str
2871
2871
  examples:
2872
2872
  - "{{ 'Test client_secret' | base64encode }} -> 'VGVzdCBjbGllbnRfc2VjcmV0'"
2873
- - title: base64decode
2873
+ - title: Base64 decoder
2874
2874
  description: Decodes the specified base64 format value into a common string.
2875
2875
  arguments: {}
2876
2876
  return_type: str
2877
2877
  examples:
2878
2878
  - "{{ 'ZmFrZSByZWZyZXNoX3Rva2VuIHZhbHVl' | base64decode }} -> 'fake refresh_token value'"
2879
- - title: string
2879
+ - title: String
2880
2880
  description: Converts the specified value to a string.
2881
2881
  arguments: {}
2882
2882
  return_type: str
2883
2883
  examples:
2884
2884
  - '{{ 1 | string }} -> "1"'
2885
2885
  - '{{ ["hello", "world" | string }} -> "["hello", "world"]"'
2886
- - title: regex_search
2886
+ - title: Regex Search
2887
2887
  description: Match the input string against a regular expression and return the first match.
2888
2888
  arguments:
2889
2889
  regex: The regular expression to search for. It must include a capture group.
@@ -5,12 +5,11 @@ import codecs
5
5
  import logging
6
6
  from dataclasses import InitVar, dataclass
7
7
  from gzip import decompress
8
- from typing import Any, Generator, List, Mapping, MutableMapping, Optional
8
+ from typing import Any, Generator, Mapping, MutableMapping, List, Optional
9
9
 
10
- import orjson
11
10
  import requests
12
-
13
11
  from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
12
+ import orjson
14
13
 
15
14
  logger = logging.getLogger("airbyte")
16
15
 
@@ -4,7 +4,6 @@ import logging
4
4
  from typing import Any, Generator, Mapping
5
5
 
6
6
  import requests
7
-
8
7
  from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
9
8
 
10
9
  logger = logging.getLogger("airbyte")
@@ -7,7 +7,6 @@ from dataclasses import dataclass
7
7
  from typing import Any, Generator, MutableMapping
8
8
 
9
9
  import requests
10
-
11
10
  from airbyte_cdk.sources.declarative.decoders import Decoder
12
11
 
13
12
  logger = logging.getLogger("airbyte")
@@ -9,7 +9,6 @@ from xml.parsers.expat import ExpatError
9
9
 
10
10
  import requests
11
11
  import xmltodict
12
-
13
12
  from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
14
13
 
15
14
  logger = logging.getLogger("airbyte")
@@ -7,7 +7,6 @@ from typing import Any, Iterable, List, Mapping, MutableMapping, Union
7
7
 
8
8
  import dpath
9
9
  import requests
10
-
11
10
  from airbyte_cdk.sources.declarative.decoders import Decoder, JsonDecoder
12
11
  from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
13
12
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
@@ -6,7 +6,6 @@ from abc import abstractmethod
6
6
  from typing import Any, Iterable, Mapping, Optional
7
7
 
8
8
  import requests
9
-
10
9
  from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
11
10
 
12
11
 
@@ -1,6 +1,7 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
+ import datetime
4
5
  from dataclasses import InitVar, dataclass
5
6
  from typing import Any, Iterable, Mapping, Optional, Union
6
7
 
@@ -10,7 +11,7 @@ from airbyte_cdk.sources.declarative.incremental import (
10
11
  PerPartitionWithGlobalCursor,
11
12
  )
12
13
  from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
13
- from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
14
+ from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
14
15
 
15
16
 
16
17
  @dataclass
@@ -67,6 +68,20 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
67
68
  self._date_time_based_cursor = date_time_based_cursor
68
69
  self._substream_cursor = substream_cursor
69
70
 
71
+ @property
72
+ def _cursor_field(self) -> str:
73
+ return self._date_time_based_cursor.cursor_field.eval(self._date_time_based_cursor.config) # type: ignore # eval returns a string in this context
74
+
75
+ @property
76
+ def _start_date_from_config(self) -> datetime.datetime:
77
+ return self._date_time_based_cursor._start_datetime.get_datetime(
78
+ self._date_time_based_cursor.config
79
+ )
80
+
81
+ @property
82
+ def _end_datetime(self) -> datetime.datetime:
83
+ return self._date_time_based_cursor.select_best_end_datetime()
84
+
70
85
  def filter_records(
71
86
  self,
72
87
  records: Iterable[Mapping[str, Any]],
@@ -74,14 +89,16 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
74
89
  stream_slice: Optional[StreamSlice] = None,
75
90
  next_page_token: Optional[Mapping[str, Any]] = None,
76
91
  ) -> Iterable[Mapping[str, Any]]:
92
+ state_value = self._get_state_value(
93
+ stream_state, stream_slice or StreamSlice(partition={}, cursor_slice={})
94
+ )
95
+ filter_date: datetime.datetime = self._get_filter_date(state_value)
77
96
  records = (
78
97
  record
79
98
  for record in records
80
- if (self._substream_cursor or self._date_time_based_cursor).should_be_synced(
81
- # Record is created on the fly to align with cursors interface; stream name is ignored as we don't need it here
82
- # Record stream name is empty cause it is not used durig the filtering
83
- Record(data=record, associated_slice=stream_slice, stream_name="")
84
- )
99
+ if self._end_datetime
100
+ >= self._date_time_based_cursor.parse_date(record[self._cursor_field])
101
+ >= filter_date
85
102
  )
86
103
  if self.condition:
87
104
  records = super().filter_records(
@@ -91,3 +108,28 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
91
108
  next_page_token=next_page_token,
92
109
  )
93
110
  yield from records
111
+
112
+ def _get_state_value(
113
+ self, stream_state: StreamState, stream_slice: StreamSlice
114
+ ) -> Optional[str]:
115
+ """
116
+ Return cursor_value or None in case it was not found.
117
+ Cursor_value may be empty if:
118
+ 1. It is an initial sync => no stream_state exist at all.
119
+ 2. In Parent-child stream, and we already make initial sync, so stream_state is present.
120
+ During the second read, we receive one extra record from parent and therefore no stream_state for this record will be found.
121
+
122
+ :param StreamState stream_state: State
123
+ :param StreamSlice stream_slice: Current Stream slice
124
+ :return Optional[str]: cursor_value in case it was found, otherwise None.
125
+ """
126
+ state = (self._substream_cursor or self._date_time_based_cursor).select_state(stream_slice)
127
+
128
+ return state.get(self._cursor_field) if state else None
129
+
130
+ def _get_filter_date(self, state_value: Optional[str]) -> datetime.datetime:
131
+ start_date_parsed = self._start_date_from_config
132
+ if state_value:
133
+ return max(start_date_parsed, self._date_time_based_cursor.parse_date(state_value))
134
+ else:
135
+ return start_date_parsed