airbyte-cdk 6.7.1rc3__py3-none-any.whl → 6.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +2 -1
  2. airbyte_cdk/config_observation.py +2 -1
  3. airbyte_cdk/connector.py +1 -0
  4. airbyte_cdk/connector_builder/connector_builder_handler.py +1 -1
  5. airbyte_cdk/connector_builder/main.py +2 -1
  6. airbyte_cdk/destinations/destination.py +2 -1
  7. airbyte_cdk/destinations/vector_db_based/config.py +2 -1
  8. airbyte_cdk/destinations/vector_db_based/document_processor.py +4 -3
  9. airbyte_cdk/destinations/vector_db_based/embedder.py +5 -4
  10. airbyte_cdk/entrypoint.py +3 -2
  11. airbyte_cdk/logger.py +2 -1
  12. airbyte_cdk/models/__init__.py +2 -0
  13. airbyte_cdk/models/airbyte_protocol.py +2 -1
  14. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +3 -3
  15. airbyte_cdk/sources/concurrent_source/concurrent_source.py +1 -1
  16. airbyte_cdk/sources/config.py +2 -1
  17. airbyte_cdk/sources/declarative/auth/jwt.py +1 -0
  18. airbyte_cdk/sources/declarative/auth/oauth.py +1 -0
  19. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +1 -0
  20. airbyte_cdk/sources/declarative/auth/token.py +2 -1
  21. airbyte_cdk/sources/declarative/auth/token_provider.py +3 -2
  22. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +6 -4
  23. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +196 -0
  24. airbyte_cdk/sources/declarative/decoders/json_decoder.py +3 -2
  25. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +1 -0
  26. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +1 -0
  27. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +1 -0
  28. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +1 -0
  29. airbyte_cdk/sources/declarative/extractors/http_selector.py +1 -0
  30. airbyte_cdk/sources/declarative/extractors/record_filter.py +6 -48
  31. airbyte_cdk/sources/declarative/extractors/record_selector.py +32 -4
  32. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +7 -2
  33. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +2 -1
  34. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +5 -2
  35. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +5 -2
  36. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +1 -3
  37. airbyte_cdk/sources/declarative/interpolation/jinja.py +5 -4
  38. airbyte_cdk/sources/declarative/manifest_declarative_source.py +4 -3
  39. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +1 -1
  40. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +144 -0
  41. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +41 -4
  42. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +1 -0
  43. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +1 -0
  44. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +1 -0
  45. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +1 -0
  46. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +1 -0
  47. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +1 -0
  48. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +1 -0
  49. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +1 -0
  50. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +1 -0
  51. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +3 -2
  52. airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -0
  53. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +1 -0
  54. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +1 -0
  55. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +1 -0
  56. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +1 -0
  57. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +1 -0
  58. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +1 -0
  59. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +1 -0
  60. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +9 -3
  61. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +2 -1
  62. airbyte_cdk/sources/declarative/requesters/requester.py +1 -0
  63. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +2 -1
  64. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +12 -4
  65. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +7 -4
  66. airbyte_cdk/sources/declarative/transformations/add_fields.py +1 -0
  67. airbyte_cdk/sources/declarative/transformations/remove_fields.py +1 -0
  68. airbyte_cdk/sources/declarative/yaml_declarative_source.py +1 -0
  69. airbyte_cdk/sources/embedded/tools.py +1 -0
  70. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +2 -1
  71. airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
  72. airbyte_cdk/sources/file_based/config/csv_format.py +2 -1
  73. airbyte_cdk/sources/file_based/config/excel_format.py +2 -1
  74. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +2 -1
  75. airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
  76. airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
  77. airbyte_cdk/sources/file_based/config/unstructured_format.py +2 -1
  78. airbyte_cdk/sources/file_based/file_based_source.py +2 -1
  79. airbyte_cdk/sources/file_based/file_based_stream_reader.py +2 -1
  80. airbyte_cdk/sources/file_based/file_types/avro_parser.py +1 -0
  81. airbyte_cdk/sources/file_based/file_types/csv_parser.py +2 -1
  82. airbyte_cdk/sources/file_based/file_types/excel_parser.py +5 -5
  83. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +2 -1
  84. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +2 -1
  85. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +9 -8
  86. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +2 -1
  87. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +5 -4
  88. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +1 -1
  89. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +1 -1
  90. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +1 -1
  91. airbyte_cdk/sources/http_logger.py +1 -0
  92. airbyte_cdk/sources/streams/call_rate.py +1 -2
  93. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +2 -1
  94. airbyte_cdk/sources/streams/concurrent/adapters.py +8 -4
  95. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +2 -1
  96. airbyte_cdk/sources/streams/concurrent/cursor.py +30 -6
  97. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -1
  98. airbyte_cdk/sources/streams/concurrent/partitions/types.py +1 -1
  99. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +1 -1
  100. airbyte_cdk/sources/streams/core.py +2 -1
  101. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +2 -1
  102. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +1 -0
  103. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +1 -0
  104. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +2 -1
  105. airbyte_cdk/sources/streams/http/http.py +3 -2
  106. airbyte_cdk/sources/streams/http/http_client.py +49 -2
  107. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +2 -1
  108. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +1 -0
  109. airbyte_cdk/sources/types.py +14 -1
  110. airbyte_cdk/sources/utils/schema_helpers.py +3 -2
  111. airbyte_cdk/sql/secrets.py +2 -1
  112. airbyte_cdk/sql/shared/sql_processor.py +8 -6
  113. airbyte_cdk/test/entrypoint_wrapper.py +4 -3
  114. airbyte_cdk/test/mock_http/mocker.py +1 -0
  115. airbyte_cdk/utils/schema_inferrer.py +2 -1
  116. airbyte_cdk/utils/slice_hasher.py +1 -1
  117. airbyte_cdk/utils/traced_exception.py +2 -1
  118. {airbyte_cdk-6.7.1rc3.dist-info → airbyte_cdk-6.7.2.dist-info}/METADATA +9 -2
  119. {airbyte_cdk-6.7.1rc3.dist-info → airbyte_cdk-6.7.2.dist-info}/RECORD +122 -123
  120. airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -35
  121. {airbyte_cdk-6.7.1rc3.dist-info → airbyte_cdk-6.7.2.dist-info}/LICENSE.txt +0 -0
  122. {airbyte_cdk-6.7.1rc3.dist-info → airbyte_cdk-6.7.2.dist-info}/WHEEL +0 -0
  123. {airbyte_cdk-6.7.1rc3.dist-info → airbyte_cdk-6.7.2.dist-info}/entry_points.txt +0 -0
@@ -1,7 +1,6 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
- import datetime
5
4
  from dataclasses import InitVar, dataclass
6
5
  from typing import Any, Iterable, Mapping, Optional, Union
7
6
 
@@ -11,7 +10,7 @@ from airbyte_cdk.sources.declarative.incremental import (
11
10
  PerPartitionWithGlobalCursor,
12
11
  )
13
12
  from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
14
- from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
13
+ from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
15
14
 
16
15
 
17
16
  @dataclass
@@ -68,20 +67,6 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
68
67
  self._date_time_based_cursor = date_time_based_cursor
69
68
  self._substream_cursor = substream_cursor
70
69
 
71
- @property
72
- def _cursor_field(self) -> str:
73
- return self._date_time_based_cursor.cursor_field.eval(self._date_time_based_cursor.config) # type: ignore # eval returns a string in this context
74
-
75
- @property
76
- def _start_date_from_config(self) -> datetime.datetime:
77
- return self._date_time_based_cursor._start_datetime.get_datetime(
78
- self._date_time_based_cursor.config
79
- )
80
-
81
- @property
82
- def _end_datetime(self) -> datetime.datetime:
83
- return self._date_time_based_cursor.select_best_end_datetime()
84
-
85
70
  def filter_records(
86
71
  self,
87
72
  records: Iterable[Mapping[str, Any]],
@@ -89,16 +74,14 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
89
74
  stream_slice: Optional[StreamSlice] = None,
90
75
  next_page_token: Optional[Mapping[str, Any]] = None,
91
76
  ) -> Iterable[Mapping[str, Any]]:
92
- state_value = self._get_state_value(
93
- stream_state, stream_slice or StreamSlice(partition={}, cursor_slice={})
94
- )
95
- filter_date: datetime.datetime = self._get_filter_date(state_value)
96
77
  records = (
97
78
  record
98
79
  for record in records
99
- if self._end_datetime
100
- >= self._date_time_based_cursor.parse_date(record[self._cursor_field])
101
- >= filter_date
80
+ if (self._substream_cursor or self._date_time_based_cursor).should_be_synced(
81
+ # Record is created on the fly to align with cursors interface; stream name is ignored as we don't need it here
82
+ # Record stream name is empty cause it is not used durig the filtering
83
+ Record(data=record, associated_slice=stream_slice, stream_name="")
84
+ )
102
85
  )
103
86
  if self.condition:
104
87
  records = super().filter_records(
@@ -108,28 +91,3 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
108
91
  next_page_token=next_page_token,
109
92
  )
110
93
  yield from records
111
-
112
- def _get_state_value(
113
- self, stream_state: StreamState, stream_slice: StreamSlice
114
- ) -> Optional[str]:
115
- """
116
- Return cursor_value or None in case it was not found.
117
- Cursor_value may be empty if:
118
- 1. It is an initial sync => no stream_state exist at all.
119
- 2. In Parent-child stream, and we already make initial sync, so stream_state is present.
120
- During the second read, we receive one extra record from parent and therefore no stream_state for this record will be found.
121
-
122
- :param StreamState stream_state: State
123
- :param StreamSlice stream_slice: Current Stream slice
124
- :return Optional[str]: cursor_value in case it was found, otherwise None.
125
- """
126
- state = (self._substream_cursor or self._date_time_based_cursor).select_state(stream_slice)
127
-
128
- return state.get(self._cursor_field) if state else None
129
-
130
- def _get_filter_date(self, state_value: Optional[str]) -> datetime.datetime:
131
- start_date_parsed = self._start_date_from_config
132
- if state_value:
133
- return max(start_date_parsed, self._date_time_based_cursor.parse_date(state_value))
134
- else:
135
- return start_date_parsed
@@ -3,12 +3,14 @@
3
3
  #
4
4
 
5
5
  from dataclasses import InitVar, dataclass, field
6
- from typing import Any, Iterable, List, Mapping, Optional
6
+ from typing import Any, Iterable, List, Mapping, Optional, Union
7
7
 
8
8
  import requests
9
+
9
10
  from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
10
11
  from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
11
12
  from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter
13
+ from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
12
14
  from airbyte_cdk.sources.declarative.models import SchemaNormalization
13
15
  from airbyte_cdk.sources.declarative.transformations import RecordTransformation
14
16
  from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
@@ -37,11 +39,34 @@ class RecordSelector(HttpSelector):
37
39
  config: Config
38
40
  parameters: InitVar[Mapping[str, Any]]
39
41
  schema_normalization: TypeTransformer
42
+ name: str
43
+ _name: Union[InterpolatedString, str] = field(init=False, repr=False, default="")
40
44
  record_filter: Optional[RecordFilter] = None
41
45
  transformations: List[RecordTransformation] = field(default_factory=lambda: [])
42
46
 
43
47
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
44
48
  self._parameters = parameters
49
+ self._name = (
50
+ InterpolatedString(self._name, parameters=parameters)
51
+ if isinstance(self._name, str)
52
+ else self._name
53
+ )
54
+
55
+ @property # type: ignore
56
+ def name(self) -> str:
57
+ """
58
+ :return: Stream name
59
+ """
60
+ return (
61
+ str(self._name.eval(self.config))
62
+ if isinstance(self._name, InterpolatedString)
63
+ else self._name
64
+ )
65
+
66
+ @name.setter
67
+ def name(self, value: str) -> None:
68
+ if not isinstance(value, property):
69
+ self._name = value
45
70
 
46
71
  def select_records(
47
72
  self,
@@ -85,7 +110,7 @@ class RecordSelector(HttpSelector):
85
110
  transformed_data = self._transform(filtered_data, stream_state, stream_slice)
86
111
  normalized_data = self._normalize_by_schema(transformed_data, schema=records_schema)
87
112
  for data in normalized_data:
88
- yield Record(data, stream_slice)
113
+ yield Record(data=data, stream_name=self.name, associated_slice=stream_slice)
89
114
 
90
115
  def _normalize_by_schema(
91
116
  self, records: Iterable[Mapping[str, Any]], schema: Optional[Mapping[str, Any]]
@@ -125,6 +150,9 @@ class RecordSelector(HttpSelector):
125
150
  for record in records:
126
151
  for transformation in self.transformations:
127
152
  transformation.transform(
128
- record, config=self.config, stream_state=stream_state, stream_slice=stream_slice
129
- ) # type: ignore # record has type Mapping[str, Any], but Dict[str, Any] expected
153
+ record, # type: ignore # record has type Mapping[str, Any], but Dict[str, Any] expected
154
+ config=self.config,
155
+ stream_state=stream_state,
156
+ stream_slice=stream_slice,
157
+ )
130
158
  yield record
@@ -6,18 +6,21 @@ import os
6
6
  import uuid
7
7
  import zlib
8
8
  from contextlib import closing
9
+ from dataclasses import InitVar, dataclass
9
10
  from typing import Any, Dict, Iterable, Mapping, Optional, Tuple
10
11
 
11
12
  import pandas as pd
12
13
  import requests
13
- from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
14
14
  from numpy import nan
15
15
 
16
+ from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
17
+
16
18
  EMPTY_STR: str = ""
17
19
  DEFAULT_ENCODING: str = "utf-8"
18
20
  DOWNLOAD_CHUNK_SIZE: int = 1024 * 10
19
21
 
20
22
 
23
+ @dataclass
21
24
  class ResponseToFileExtractor(RecordExtractor):
22
25
  """
23
26
  This class is used when having very big HTTP responses (usually streamed) which would require too much memory so we use disk space as
@@ -27,7 +30,9 @@ class ResponseToFileExtractor(RecordExtractor):
27
30
  a first iteration so we will only support CSV parsing using pandas as salesforce and sendgrid were doing.
28
31
  """
29
32
 
30
- def __init__(self) -> None:
33
+ parameters: InitVar[Mapping[str, Any]]
34
+
35
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
31
36
  self.logger = logging.getLogger("airbyte")
32
37
 
33
38
  def _get_response_encoding(self, headers: Dict[str, Any]) -> str:
@@ -7,6 +7,8 @@ from dataclasses import InitVar, dataclass, field
7
7
  from datetime import timedelta
8
8
  from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional, Union
9
9
 
10
+ from isodate import Duration, duration_isoformat, parse_duration
11
+
10
12
  from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level, Type
11
13
  from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser
12
14
  from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime
@@ -19,7 +21,6 @@ from airbyte_cdk.sources.declarative.requesters.request_option import (
19
21
  )
20
22
  from airbyte_cdk.sources.message import MessageRepository
21
23
  from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
22
- from isodate import Duration, duration_isoformat, parse_duration
23
24
 
24
25
 
25
26
  @dataclass
@@ -340,8 +340,11 @@ class GlobalSubstreamCursor(DeclarativeCursor):
340
340
  @staticmethod
341
341
  def _convert_record_to_cursor_record(record: Record) -> Record:
342
342
  return Record(
343
- record.data,
344
- StreamSlice(partition={}, cursor_slice=record.associated_slice.cursor_slice)
343
+ data=record.data,
344
+ stream_name=record.stream_name,
345
+ associated_slice=StreamSlice(
346
+ partition={}, cursor_slice=record.associated_slice.cursor_slice
347
+ )
345
348
  if record.associated_slice
346
349
  else None,
347
350
  )
@@ -325,8 +325,11 @@ class PerPartitionCursor(DeclarativeCursor):
325
325
  @staticmethod
326
326
  def _convert_record_to_cursor_record(record: Record) -> Record:
327
327
  return Record(
328
- record.data,
329
- StreamSlice(partition={}, cursor_slice=record.associated_slice.cursor_slice)
328
+ data=record.data,
329
+ stream_name=record.stream_name,
330
+ associated_slice=StreamSlice(
331
+ partition={}, cursor_slice=record.associated_slice.cursor_slice
332
+ )
330
333
  if record.associated_slice
331
334
  else None,
332
335
  )
@@ -194,9 +194,7 @@ class PerPartitionWithGlobalCursor(DeclarativeCursor):
194
194
  )
195
195
 
196
196
  def should_be_synced(self, record: Record) -> bool:
197
- return self._global_cursor.should_be_synced(
198
- record
199
- ) or self._per_partition_cursor.should_be_synced(record)
197
+ return self._get_active_cursor().should_be_synced(record)
200
198
 
201
199
  def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
202
200
  return self._global_cursor.is_greater_than_or_equal(first, second)
@@ -6,15 +6,16 @@ import ast
6
6
  from functools import cache
7
7
  from typing import Any, Mapping, Optional, Tuple, Type
8
8
 
9
- from airbyte_cdk.sources.declarative.interpolation.filters import filters
10
- from airbyte_cdk.sources.declarative.interpolation.interpolation import Interpolation
11
- from airbyte_cdk.sources.declarative.interpolation.macros import macros
12
- from airbyte_cdk.sources.types import Config
13
9
  from jinja2 import meta
14
10
  from jinja2.environment import Template
15
11
  from jinja2.exceptions import UndefinedError
16
12
  from jinja2.sandbox import SandboxedEnvironment
17
13
 
14
+ from airbyte_cdk.sources.declarative.interpolation.filters import filters
15
+ from airbyte_cdk.sources.declarative.interpolation.interpolation import Interpolation
16
+ from airbyte_cdk.sources.declarative.interpolation.macros import macros
17
+ from airbyte_cdk.sources.types import Config
18
+
18
19
 
19
20
  class StreamPartitionAccessEnvironment(SandboxedEnvironment):
20
21
  """
@@ -8,9 +8,12 @@ import pkgutil
8
8
  from copy import deepcopy
9
9
  from importlib import metadata
10
10
  from typing import Any, Dict, Iterator, List, Mapping, Optional
11
- from packaging.version import Version, InvalidVersion
12
11
 
13
12
  import yaml
13
+ from jsonschema.exceptions import ValidationError
14
+ from jsonschema.validators import validate
15
+ from packaging.version import InvalidVersion, Version
16
+
14
17
  from airbyte_cdk.models import (
15
18
  AirbyteConnectionStatus,
16
19
  AirbyteMessage,
@@ -44,8 +47,6 @@ from airbyte_cdk.sources.utils.slice_logger import (
44
47
  DebugSliceLogger,
45
48
  SliceLogger,
46
49
  )
47
- from jsonschema.exceptions import ValidationError
48
- from jsonschema.validators import validate
49
50
 
50
51
 
51
52
  class ManifestDeclarativeSource(DeclarativeSource):
@@ -5,9 +5,9 @@ from typing import Any, Mapping
5
5
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
6
6
  from airbyte_cdk.sources.declarative.migrations.state_migration import StateMigration
7
7
  from airbyte_cdk.sources.declarative.models import (
8
+ CustomIncrementalSync,
8
9
  DatetimeBasedCursor,
9
10
  SubstreamPartitionRouter,
10
- CustomIncrementalSync,
11
11
  )
12
12
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import ParentStreamConfig
13
13
 
@@ -567,6 +567,11 @@ class DpathExtractor(BaseModel):
567
567
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
568
568
 
569
569
 
570
+ class ResponseToFileExtractor(BaseModel):
571
+ type: Literal["ResponseToFileExtractor"]
572
+ parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
573
+
574
+
570
575
  class ExponentialBackoffStrategy(BaseModel):
571
576
  type: Literal["ExponentialBackoffStrategy"]
572
577
  factor: Optional[Union[float, str]] = Field(
@@ -748,6 +753,123 @@ class NoPagination(BaseModel):
748
753
  type: Literal["NoPagination"]
749
754
 
750
755
 
756
+ class State(BaseModel):
757
+ class Config:
758
+ extra = Extra.allow
759
+
760
+ min: int
761
+ max: int
762
+
763
+
764
+ class OauthConnectorInputSpecification(BaseModel):
765
+ class Config:
766
+ extra = Extra.allow
767
+
768
+ consent_url: str = Field(
769
+ ...,
770
+ description="The DeclarativeOAuth Specific string URL string template to initiate the authentication.\nThe placeholders are replaced during the processing to provide neccessary values.",
771
+ examples=[
772
+ {
773
+ "consent_url": "https://domain.host.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}"
774
+ },
775
+ {
776
+ "consent_url": "https://endpoint.host.com/oauth2/authorize?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{scope_key}={urlEncoder:{{scope_key}}}&{state_key}={{state_key}}&subdomain={subdomain}"
777
+ },
778
+ ],
779
+ title="DeclarativeOAuth Consent URL",
780
+ )
781
+ scope: Optional[str] = Field(
782
+ None,
783
+ description="The DeclarativeOAuth Specific string of the scopes needed to be grant for authenticated user.",
784
+ examples=[{"scope": "user:read user:read_orders workspaces:read"}],
785
+ title="(Optional) DeclarativeOAuth Scope",
786
+ )
787
+ access_token_url: str = Field(
788
+ ...,
789
+ description="The DeclarativeOAuth Specific URL templated string to obtain the `access_token`, `refresh_token` etc.\nThe placeholders are replaced during the processing to provide neccessary values.",
790
+ examples=[
791
+ {
792
+ "access_token_url": "https://auth.host.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}"
793
+ }
794
+ ],
795
+ title="DeclarativeOAuth Access Token URL",
796
+ )
797
+ access_token_headers: Optional[Dict[str, Any]] = Field(
798
+ None,
799
+ description="The DeclarativeOAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.",
800
+ examples=[
801
+ {
802
+ "access_token_headers": {
803
+ "Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}"
804
+ }
805
+ }
806
+ ],
807
+ title="(Optional) DeclarativeOAuth Access Token Headers",
808
+ )
809
+ access_token_params: Optional[Dict[str, Any]] = Field(
810
+ None,
811
+ description="The DeclarativeOAuth Specific optional query parameters to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.\nWhen this property is provided, the query params will be encoded as `Json` and included in the outgoing API request.",
812
+ examples=[
813
+ {
814
+ "access_token_params": {
815
+ "{auth_code_key}": "{{auth_code_key}}",
816
+ "{client_id_key}": "{{client_id_key}}",
817
+ "{client_secret_key}": "{{client_secret_key}}",
818
+ }
819
+ }
820
+ ],
821
+ title="(Optional) DeclarativeOAuth Access Token Query Params (Json Encoded)",
822
+ )
823
+ extract_output: List[str] = Field(
824
+ ...,
825
+ description="The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config. ",
826
+ examples=[{"extract_output": ["access_token", "refresh_token", "other_field"]}],
827
+ title="DeclarativeOAuth Extract Output",
828
+ )
829
+ state: Optional[State] = Field(
830
+ None,
831
+ description="The DeclarativeOAuth Specific object to provide the criteria of how the `state` query param should be constructed,\nincluding length and complexity. ",
832
+ examples=[{"state": {"min": 7, "max": 128}}],
833
+ title="(Optional) DeclarativeOAuth Configurable State Query Param",
834
+ )
835
+ client_id_key: Optional[str] = Field(
836
+ None,
837
+ description="The DeclarativeOAuth Specific optional override to provide the custom `client_id` key name, if required by data-provider.",
838
+ examples=[{"client_id_key": "my_custom_client_id_key_name"}],
839
+ title="(Optional) DeclarativeOAuth Client ID Key Override",
840
+ )
841
+ client_secret_key: Optional[str] = Field(
842
+ None,
843
+ description="The DeclarativeOAuth Specific optional override to provide the custom `client_secret` key name, if required by data-provider.",
844
+ examples=[{"client_secret_key": "my_custom_client_secret_key_name"}],
845
+ title="(Optional) DeclarativeOAuth Client Secret Key Override",
846
+ )
847
+ scope_key: Optional[str] = Field(
848
+ None,
849
+ description="The DeclarativeOAuth Specific optional override to provide the custom `scope` key name, if required by data-provider.",
850
+ examples=[{"scope_key": "my_custom_scope_key_key_name"}],
851
+ title="(Optional) DeclarativeOAuth Scope Key Override",
852
+ )
853
+ state_key: Optional[str] = Field(
854
+ None,
855
+ description="The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider. ",
856
+ examples=[{"state_key": "my_custom_state_key_key_name"}],
857
+ title="(Optional) DeclarativeOAuth State Key Override",
858
+ )
859
+ auth_code_key: Optional[str] = Field(
860
+ None,
861
+ description="The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider. ",
862
+ examples=[{"auth_code_key": "my_custom_auth_code_key_name"}],
863
+ title="(Optional) DeclarativeOAuth Auth Code Key Override",
864
+ )
865
+ redirect_uri_key: Optional[str] = Field(
866
+ None,
867
+ description="The DeclarativeOAuth Specific optional override to provide the custom `redirect_uri` key name to something like `callback_uri`, if required by data-provider.",
868
+ examples=[{"redirect_uri_key": "my_custom_redirect_uri_key_name"}],
869
+ title="(Optional) DeclarativeOAuth Redirect URI Key Override",
870
+ )
871
+
872
+
751
873
  class OAuthConfigSpecification(BaseModel):
752
874
  class Config:
753
875
  extra = Extra.allow
@@ -766,6 +888,11 @@ class OAuthConfigSpecification(BaseModel):
766
888
  ],
767
889
  title="OAuth user input",
768
890
  )
891
+ oauth_connector_input_specification: Optional[OauthConnectorInputSpecification] = Field(
892
+ None,
893
+ description='The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{my_var}`.\n- The nested resolution variables like `{{my_nested_var}}` is allowed as well.\n\n- The allowed interpolation context is:\n + base64Encoder - encode to `base64`, {base64Encoder:{my_var_a}:{my_var_b}}\n + base64Decorer - decode from `base64` encoded string, {base64Decoder:{my_string_variable_or_string_value}}\n + urlEncoder - encode the input string to URL-like format, {urlEncoder:https://test.host.com/endpoint}\n + urlDecorer - decode the input url-encoded string into text format, {urlDecoder:https%3A%2F%2Fairbyte.io}\n + codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {codeChallengeS256:{state_value}}\n\nExamples:\n - The TikTok Marketing DeclarativeOAuth spec:\n {\n "oauth_connector_input_specification": {\n "type": "object",\n "additionalProperties": false,\n "properties": {\n "consent_url": "https://ads.tiktok.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}",\n "access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",\n "access_token_params": {\n "{auth_code_key}": "{{auth_code_key}}",\n "{client_id_key}": "{{client_id_key}}",\n "{client_secret_key}": "{{client_secret_key}}"\n },\n "access_token_headers": {\n "Content-Type": "application/json",\n "Accept": "application/json"\n },\n "extract_output": ["data.access_token"],\n "client_id_key": "app_id",\n "client_secret_key": "secret",\n "auth_code_key": "auth_code"\n }\n }\n }',
894
+ title="DeclarativeOAuth Connector Specification",
895
+ )
769
896
  complete_oauth_output_specification: Optional[Dict[str, Any]] = Field(
770
897
  None,
771
898
  description="OAuth specific blob. This is a Json Schema used to validate Json configurations produced by the OAuth flows as they are\nreturned by the distant OAuth APIs.\nMust be a valid JSON describing the fields to merge back to `ConnectorSpecification.connectionSpecification`.\nFor each field, a special annotation `path_in_connector_config` can be specified to determine where to merge it,\nExamples:\n complete_oauth_output_specification={\n refresh_token: {\n type: string,\n path_in_connector_config: ['credentials', 'refresh_token']\n }\n }",
@@ -1676,6 +1803,9 @@ class AsyncRetriever(BaseModel):
1676
1803
  ...,
1677
1804
  description="Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.",
1678
1805
  )
1806
+ download_extractor: Optional[
1807
+ Union[CustomRecordExtractor, DpathExtractor, ResponseToFileExtractor]
1808
+ ] = Field(None, description="Responsible for fetching the records from provided urls.")
1679
1809
  creation_requester: Union[CustomRequester, HttpRequester] = Field(
1680
1810
  ...,
1681
1811
  description="Requester component that describes how to prepare HTTP requests to send to the source API to create the async server-side job.",
@@ -1726,6 +1856,20 @@ class AsyncRetriever(BaseModel):
1726
1856
  description="Component decoding the response so records can be extracted.",
1727
1857
  title="Decoder",
1728
1858
  )
1859
+ download_decoder: Optional[
1860
+ Union[
1861
+ CustomDecoder,
1862
+ JsonDecoder,
1863
+ JsonlDecoder,
1864
+ IterableDecoder,
1865
+ XmlDecoder,
1866
+ GzipJsonDecoder,
1867
+ ]
1868
+ ] = Field(
1869
+ None,
1870
+ description="Component decoding the download response so records can be extracted.",
1871
+ title="Download Decoder",
1872
+ )
1729
1873
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1730
1874
 
1731
1875
 
@@ -25,6 +25,9 @@ from typing import (
25
25
  get_type_hints,
26
26
  )
27
27
 
28
+ from isodate import parse_duration
29
+ from pydantic.v1 import BaseModel
30
+
28
31
  from airbyte_cdk.models import FailureType, Level
29
32
  from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
30
33
  from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator
@@ -267,6 +270,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
267
270
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
268
271
  RequestPath as RequestPathModel,
269
272
  )
273
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
274
+ ResponseToFileExtractor as ResponseToFileExtractorModel,
275
+ )
270
276
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
271
277
  SelectiveAuthenticator as SelectiveAuthenticatorModel,
272
278
  )
@@ -367,8 +373,6 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_sta
367
373
  from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction
368
374
  from airbyte_cdk.sources.types import Config
369
375
  from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
370
- from isodate import parse_duration
371
- from pydantic.v1 import BaseModel
372
376
 
373
377
  ComponentDefinition = Mapping[str, Any]
374
378
 
@@ -426,6 +430,7 @@ class ModelToComponentFactory:
426
430
  DefaultErrorHandlerModel: self.create_default_error_handler,
427
431
  DefaultPaginatorModel: self.create_default_paginator,
428
432
  DpathExtractorModel: self.create_dpath_extractor,
433
+ ResponseToFileExtractorModel: self.create_response_to_file_extractor,
429
434
  ExponentialBackoffStrategyModel: self.create_exponential_backoff_strategy,
430
435
  SessionTokenAuthenticatorModel: self.create_session_token_authenticator,
431
436
  HttpRequesterModel: self.create_http_requester,
@@ -1446,6 +1451,13 @@ class ModelToComponentFactory:
1446
1451
  parameters=model.parameters or {},
1447
1452
  )
1448
1453
 
1454
+ def create_response_to_file_extractor(
1455
+ self,
1456
+ model: ResponseToFileExtractorModel,
1457
+ **kwargs: Any,
1458
+ ) -> ResponseToFileExtractor:
1459
+ return ResponseToFileExtractor(parameters=model.parameters or {})
1460
+
1449
1461
  @staticmethod
1450
1462
  def create_exponential_backoff_strategy(
1451
1463
  model: ExponentialBackoffStrategyModel, config: Config
@@ -1780,6 +1792,7 @@ class ModelToComponentFactory:
1780
1792
  self,
1781
1793
  model: RecordSelectorModel,
1782
1794
  config: Config,
1795
+ name: str,
1783
1796
  *,
1784
1797
  transformations: List[RecordTransformation],
1785
1798
  decoder: Optional[Decoder] = None,
@@ -1810,6 +1823,7 @@ class ModelToComponentFactory:
1810
1823
 
1811
1824
  return RecordSelector(
1812
1825
  extractor=extractor,
1826
+ name=name,
1813
1827
  config=config,
1814
1828
  record_filter=record_filter,
1815
1829
  transformations=transformations,
@@ -1880,6 +1894,7 @@ class ModelToComponentFactory:
1880
1894
  )
1881
1895
  record_selector = self._create_component_from_model(
1882
1896
  model=model.record_selector,
1897
+ name=name,
1883
1898
  config=config,
1884
1899
  decoder=decoder,
1885
1900
  transformations=transformations,
@@ -2007,6 +2022,7 @@ class ModelToComponentFactory:
2007
2022
  model=model.record_selector,
2008
2023
  config=config,
2009
2024
  decoder=decoder,
2025
+ name=name,
2010
2026
  transformations=transformations,
2011
2027
  client_side_incremental_sync=client_side_incremental_sync,
2012
2028
  )
@@ -2024,16 +2040,37 @@ class ModelToComponentFactory:
2024
2040
  name=f"job polling - {name}",
2025
2041
  )
2026
2042
  job_download_components_name = f"job download - {name}"
2043
+ download_decoder = (
2044
+ self._create_component_from_model(model=model.download_decoder, config=config)
2045
+ if model.download_decoder
2046
+ else JsonDecoder(parameters={})
2047
+ )
2048
+ download_extractor = (
2049
+ self._create_component_from_model(
2050
+ model=model.download_extractor,
2051
+ config=config,
2052
+ decoder=download_decoder,
2053
+ parameters=model.parameters,
2054
+ )
2055
+ if model.download_extractor
2056
+ else DpathExtractor(
2057
+ [],
2058
+ config=config,
2059
+ decoder=download_decoder,
2060
+ parameters=model.parameters or {},
2061
+ )
2062
+ )
2027
2063
  download_requester = self._create_component_from_model(
2028
2064
  model=model.download_requester,
2029
- decoder=decoder,
2065
+ decoder=download_decoder,
2030
2066
  config=config,
2031
2067
  name=job_download_components_name,
2032
2068
  )
2033
2069
  download_retriever = SimpleRetriever(
2034
2070
  requester=download_requester,
2035
2071
  record_selector=RecordSelector(
2036
- extractor=ResponseToFileExtractor(),
2072
+ extractor=download_extractor,
2073
+ name=name,
2037
2074
  record_filter=None,
2038
2075
  transformations=[],
2039
2076
  schema_normalization=TypeTransformer(TransformConfig.NoTransform),
@@ -7,6 +7,7 @@ from dataclasses import InitVar, dataclass
7
7
  from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, Optional, Union
8
8
 
9
9
  import dpath
10
+
10
11
  from airbyte_cdk.models import AirbyteMessage
11
12
  from airbyte_cdk.models import Type as MessageType
12
13
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
@@ -6,6 +6,7 @@ from dataclasses import InitVar, dataclass
6
6
  from typing import Any, Mapping, Optional, Union
7
7
 
8
8
  import requests
9
+
9
10
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
10
11
  from airbyte_cdk.sources.streams.http.error_handlers import BackoffStrategy
11
12
  from airbyte_cdk.sources.types import Config
@@ -6,6 +6,7 @@ from dataclasses import InitVar, dataclass
6
6
  from typing import Any, Mapping, Optional, Union
7
7
 
8
8
  import requests
9
+
9
10
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
10
11
  from airbyte_cdk.sources.streams.http.error_handlers import BackoffStrategy
11
12
  from airbyte_cdk.sources.types import Config
@@ -7,6 +7,7 @@ from dataclasses import InitVar, dataclass
7
7
  from typing import Any, Mapping, Optional, Union
8
8
 
9
9
  import requests
10
+
10
11
  from airbyte_cdk.models import FailureType
11
12
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
12
13
  from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies.header_helper import (
@@ -9,6 +9,7 @@ from dataclasses import InitVar, dataclass
9
9
  from typing import Any, Mapping, Optional, Union
10
10
 
11
11
  import requests
12
+
12
13
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
13
14
  from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies.header_helper import (
14
15
  get_numeric_value_from_header,
@@ -6,6 +6,7 @@ from dataclasses import InitVar, dataclass
6
6
  from typing import Any, List, Mapping, Optional, Union
7
7
 
8
8
  import requests
9
+
9
10
  from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler
10
11
  from airbyte_cdk.sources.streams.http.error_handlers.response_models import (
11
12
  ErrorResolution,