airbyte-cdk 6.8.1rc9__py3-none-any.whl → 6.8.2.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +11 -5
- airbyte_cdk/config_observation.py +1 -1
- airbyte_cdk/connector_builder/main.py +1 -1
- airbyte_cdk/connector_builder/message_grouper.py +10 -10
- airbyte_cdk/destinations/destination.py +1 -1
- airbyte_cdk/destinations/vector_db_based/embedder.py +2 -2
- airbyte_cdk/destinations/vector_db_based/writer.py +12 -4
- airbyte_cdk/entrypoint.py +7 -6
- airbyte_cdk/logger.py +2 -2
- airbyte_cdk/sources/abstract_source.py +1 -1
- airbyte_cdk/sources/config.py +1 -1
- airbyte_cdk/sources/connector_state_manager.py +9 -4
- airbyte_cdk/sources/declarative/auth/oauth.py +1 -1
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +6 -1
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +76 -28
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +10 -4
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +16 -17
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +4 -1
- airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
- airbyte_cdk/sources/declarative/incremental/__init__.py +3 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +270 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +8 -6
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +9 -0
- airbyte_cdk/sources/declarative/interpolation/jinja.py +35 -36
- airbyte_cdk/sources/declarative/interpolation/macros.py +1 -1
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +71 -17
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +13 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +1 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +8 -6
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +1 -1
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +2 -2
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +1 -1
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +5 -2
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
- airbyte_cdk/sources/declarative/spec/spec.py +1 -1
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +0 -1
- airbyte_cdk/sources/embedded/base_integration.py +3 -2
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +12 -4
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +18 -7
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +14 -11
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +3 -3
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +11 -5
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +1 -1
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +2 -2
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +6 -3
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +1 -1
- airbyte_cdk/sources/http_logger.py +3 -3
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +5 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +6 -3
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +9 -3
- airbyte_cdk/sources/streams/concurrent/cursor.py +10 -1
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +2 -2
- airbyte_cdk/sources/streams/core.py +17 -14
- airbyte_cdk/sources/streams/http/http.py +19 -19
- airbyte_cdk/sources/streams/http/http_client.py +4 -48
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +62 -33
- airbyte_cdk/sources/utils/record_helper.py +1 -1
- airbyte_cdk/sources/utils/schema_helpers.py +1 -1
- airbyte_cdk/sources/utils/transform.py +34 -15
- airbyte_cdk/test/entrypoint_wrapper.py +11 -6
- airbyte_cdk/test/mock_http/response_builder.py +1 -1
- airbyte_cdk/utils/airbyte_secrets_utils.py +1 -1
- airbyte_cdk/utils/event_timing.py +10 -10
- airbyte_cdk/utils/message_utils.py +4 -3
- airbyte_cdk/utils/spec_schema_transformations.py +3 -2
- airbyte_cdk/utils/traced_exception.py +14 -12
- airbyte_cdk-6.8.2.dev1.dist-info/METADATA +111 -0
- {airbyte_cdk-6.8.1rc9.dist-info → airbyte_cdk-6.8.2.dev1.dist-info}/RECORD +72 -71
- airbyte_cdk-6.8.1rc9.dist-info/METADATA +0 -307
- {airbyte_cdk-6.8.1rc9.dist-info → airbyte_cdk-6.8.2.dev1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.8.1rc9.dist-info → airbyte_cdk-6.8.2.dev1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.8.1rc9.dist-info → airbyte_cdk-6.8.2.dev1.dist-info}/entry_points.txt +0 -0
@@ -25,7 +25,7 @@ from datetime import datetime
|
|
25
25
|
from pathlib import Path
|
26
26
|
from typing import Any, cast
|
27
27
|
|
28
|
-
|
28
|
+
import orjson
|
29
29
|
|
30
30
|
from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch
|
31
31
|
from airbyte_cdk.models import (
|
@@ -72,7 +72,7 @@ class SourceLocalYaml(YamlDeclarativeSource):
|
|
72
72
|
super().__init__(
|
73
73
|
catalog=catalog,
|
74
74
|
config=config,
|
75
|
-
state=state,
|
75
|
+
state=state, # type: ignore [arg-type]
|
76
76
|
path_to_yaml="manifest.yaml",
|
77
77
|
)
|
78
78
|
|
@@ -152,7 +152,9 @@ def handle_remote_manifest_command(args: list[str]) -> None:
|
|
152
152
|
)
|
153
153
|
|
154
154
|
|
155
|
-
def create_declarative_source(
|
155
|
+
def create_declarative_source(
|
156
|
+
args: list[str],
|
157
|
+
) -> ConcurrentDeclarativeSource: # type: ignore [type-arg]
|
156
158
|
"""Creates the source with the injected config.
|
157
159
|
|
158
160
|
This essentially does what other low-code sources do at build time, but at runtime,
|
@@ -160,10 +162,14 @@ def create_declarative_source(args: list[str]) -> ConcurrentDeclarativeSource:
|
|
160
162
|
connector builder.
|
161
163
|
"""
|
162
164
|
try:
|
165
|
+
config: Mapping[str, Any] | None
|
166
|
+
catalog: ConfiguredAirbyteCatalog | None
|
167
|
+
state: list[AirbyteStateMessage]
|
163
168
|
config, catalog, state = _parse_inputs_into_config_catalog_state(args)
|
164
|
-
if "__injected_declarative_manifest" not in config:
|
169
|
+
if config is None or "__injected_declarative_manifest" not in config:
|
165
170
|
raise ValueError(
|
166
|
-
|
171
|
+
"Invalid config: `__injected_declarative_manifest` should be provided at the root "
|
172
|
+
f"of the config but config only has keys: {list(config.keys() if config else [])}"
|
167
173
|
)
|
168
174
|
return ConcurrentDeclarativeSource(
|
169
175
|
config=config,
|
@@ -71,7 +71,7 @@ class MessageGrouper:
|
|
71
71
|
|
72
72
|
is_nested_key = isinstance(field[0], str)
|
73
73
|
if is_nested_key:
|
74
|
-
return [field]
|
74
|
+
return [field]
|
75
75
|
|
76
76
|
raise ValueError(f"Unknown type for cursor field `{field}")
|
77
77
|
|
@@ -232,9 +232,9 @@ class MessageGrouper:
|
|
232
232
|
current_slice_descriptor = self._parse_slice_description(message.log.message) # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message
|
233
233
|
current_slice_pages = []
|
234
234
|
at_least_one_page_in_group = False
|
235
|
-
elif message.type == MessageType.LOG and message.log.message.startswith(
|
235
|
+
elif message.type == MessageType.LOG and message.log.message.startswith( # type: ignore[union-attr] # None doesn't have 'message'
|
236
236
|
SliceLogger.SLICE_LOG_PREFIX
|
237
|
-
):
|
237
|
+
):
|
238
238
|
# parsing the first slice
|
239
239
|
current_slice_descriptor = self._parse_slice_description(message.log.message) # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message
|
240
240
|
elif message.type == MessageType.LOG:
|
@@ -274,14 +274,14 @@ class MessageGrouper:
|
|
274
274
|
if message.trace.type == TraceType.ERROR: # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has trace.type
|
275
275
|
yield message.trace
|
276
276
|
elif message.type == MessageType.RECORD:
|
277
|
-
current_page_records.append(message.record.data) # type: ignore[union-attr] # AirbyteMessage with MessageType.RECORD has record.data
|
277
|
+
current_page_records.append(message.record.data) # type: ignore[arg-type, union-attr] # AirbyteMessage with MessageType.RECORD has record.data
|
278
278
|
records_count += 1
|
279
279
|
schema_inferrer.accumulate(message.record)
|
280
280
|
datetime_format_inferrer.accumulate(message.record)
|
281
281
|
elif (
|
282
282
|
message.type == MessageType.CONTROL
|
283
|
-
and message.control.type == OrchestratorType.CONNECTOR_CONFIG
|
284
|
-
):
|
283
|
+
and message.control.type == OrchestratorType.CONNECTOR_CONFIG # type: ignore[union-attr] # None doesn't have 'type'
|
284
|
+
):
|
285
285
|
yield message.control
|
286
286
|
elif message.type == MessageType.STATE:
|
287
287
|
latest_state_message = message.state # type: ignore[assignment]
|
@@ -310,8 +310,8 @@ class MessageGrouper:
|
|
310
310
|
and message.type == MessageType.LOG
|
311
311
|
and (
|
312
312
|
MessageGrouper._is_page_http_request(json_message)
|
313
|
-
or message.log.message.startswith("slice:")
|
314
|
-
)
|
313
|
+
or message.log.message.startswith("slice:") # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message
|
314
|
+
)
|
315
315
|
)
|
316
316
|
|
317
317
|
@staticmethod
|
@@ -355,8 +355,8 @@ class MessageGrouper:
|
|
355
355
|
StreamReadPages(
|
356
356
|
request=current_page_request,
|
357
357
|
response=current_page_response,
|
358
|
-
records=deepcopy(current_page_records),
|
359
|
-
)
|
358
|
+
records=deepcopy(current_page_records), # type: ignore [arg-type]
|
359
|
+
)
|
360
360
|
)
|
361
361
|
current_page_records.clear()
|
362
362
|
|
@@ -9,7 +9,7 @@ import sys
|
|
9
9
|
from abc import ABC, abstractmethod
|
10
10
|
from typing import Any, Iterable, List, Mapping
|
11
11
|
|
12
|
-
|
12
|
+
import orjson
|
13
13
|
|
14
14
|
from airbyte_cdk.connector import Connector
|
15
15
|
from airbyte_cdk.exception_handler import init_uncaught_exception_handler
|
@@ -107,7 +107,7 @@ class BaseOpenAIEmbedder(Embedder):
|
|
107
107
|
class OpenAIEmbedder(BaseOpenAIEmbedder):
|
108
108
|
def __init__(self, config: OpenAIEmbeddingConfigModel, chunk_size: int):
|
109
109
|
super().__init__(
|
110
|
-
OpenAIEmbeddings(
|
110
|
+
OpenAIEmbeddings( # type: ignore [call-arg]
|
111
111
|
openai_api_key=config.openai_key, max_retries=15, disallowed_special=()
|
112
112
|
),
|
113
113
|
chunk_size,
|
@@ -118,7 +118,7 @@ class AzureOpenAIEmbedder(BaseOpenAIEmbedder):
|
|
118
118
|
def __init__(self, config: AzureOpenAIEmbeddingConfigModel, chunk_size: int):
|
119
119
|
# Azure OpenAI API has — as of 20230927 — a limit of 16 documents per request
|
120
120
|
super().__init__(
|
121
|
-
OpenAIEmbeddings(
|
121
|
+
OpenAIEmbeddings( # type: ignore [call-arg]
|
122
122
|
openai_api_key=config.openai_key,
|
123
123
|
chunk_size=16,
|
124
124
|
max_retries=15,
|
@@ -83,11 +83,19 @@ class Writer:
|
|
83
83
|
yield message
|
84
84
|
elif message.type == Type.RECORD:
|
85
85
|
record_chunks, record_id_to_delete = self.processor.process(message.record)
|
86
|
-
self.chunks[
|
87
|
-
|
88
|
-
|
89
|
-
|
86
|
+
self.chunks[
|
87
|
+
( # type: ignore [index] # expected "tuple[str, str]", got "tuple[str | Any | None, str | Any]"
|
88
|
+
message.record.namespace, # type: ignore [union-attr] # record not None
|
89
|
+
message.record.stream, # type: ignore [union-attr] # record not None
|
90
90
|
)
|
91
|
+
].extend(record_chunks)
|
92
|
+
if record_id_to_delete is not None:
|
93
|
+
self.ids_to_delete[
|
94
|
+
( # type: ignore [index] # expected "tuple[str, str]", got "tuple[str | Any | None, str | Any]"
|
95
|
+
message.record.namespace, # type: ignore [union-attr] # record not None
|
96
|
+
message.record.stream, # type: ignore [union-attr] # record not None
|
97
|
+
)
|
98
|
+
].append(record_id_to_delete)
|
91
99
|
self.number_of_chunks += len(record_chunks)
|
92
100
|
if self.number_of_chunks >= self.batch_size:
|
93
101
|
self._process_batch()
|
airbyte_cdk/entrypoint.py
CHANGED
@@ -22,7 +22,7 @@ from requests import PreparedRequest, Response, Session
|
|
22
22
|
from airbyte_cdk.connector import TConfig
|
23
23
|
from airbyte_cdk.exception_handler import init_uncaught_exception_handler
|
24
24
|
from airbyte_cdk.logger import init_logger
|
25
|
-
from airbyte_cdk.models import (
|
25
|
+
from airbyte_cdk.models import (
|
26
26
|
AirbyteConnectionStatus,
|
27
27
|
AirbyteMessage,
|
28
28
|
AirbyteMessageSerializer,
|
@@ -255,9 +255,10 @@ class AirbyteEntrypoint(object):
|
|
255
255
|
|
256
256
|
stream_message_count[
|
257
257
|
HashableStreamDescriptor(
|
258
|
-
name=message.record.stream,
|
258
|
+
name=message.record.stream, # type: ignore[union-attr] # record has `stream`
|
259
|
+
namespace=message.record.namespace, # type: ignore[union-attr] # record has `namespace`
|
259
260
|
)
|
260
|
-
] += 1.0
|
261
|
+
] += 1.0
|
261
262
|
case Type.STATE:
|
262
263
|
if message.state is None:
|
263
264
|
raise ValueError("State message must have a state attribute")
|
@@ -266,9 +267,9 @@ class AirbyteEntrypoint(object):
|
|
266
267
|
|
267
268
|
# Set record count from the counter onto the state message
|
268
269
|
message.state.sourceStats = message.state.sourceStats or AirbyteStateStats() # type: ignore[union-attr] # state has `sourceStats`
|
269
|
-
message.state.sourceStats.recordCount = stream_message_count.get(
|
270
|
+
message.state.sourceStats.recordCount = stream_message_count.get( # type: ignore[union-attr] # state has `sourceStats`
|
270
271
|
stream_descriptor, 0.0
|
271
|
-
)
|
272
|
+
)
|
272
273
|
|
273
274
|
# Reset the counter
|
274
275
|
stream_message_count[stream_descriptor] = 0.0
|
@@ -290,7 +291,7 @@ class AirbyteEntrypoint(object):
|
|
290
291
|
|
291
292
|
@staticmethod
|
292
293
|
def airbyte_message_to_string(airbyte_message: AirbyteMessage) -> str:
|
293
|
-
return orjson.dumps(AirbyteMessageSerializer.dump(airbyte_message)).decode()
|
294
|
+
return orjson.dumps(AirbyteMessageSerializer.dump(airbyte_message)).decode()
|
294
295
|
|
295
296
|
@classmethod
|
296
297
|
def extract_state(cls, args: List[str]) -> Optional[Any]:
|
airbyte_cdk/logger.py
CHANGED
@@ -7,7 +7,7 @@ import logging
|
|
7
7
|
import logging.config
|
8
8
|
from typing import Any, Callable, Mapping, Optional, Tuple
|
9
9
|
|
10
|
-
|
10
|
+
import orjson
|
11
11
|
|
12
12
|
from airbyte_cdk.models import (
|
13
13
|
AirbyteLogMessage,
|
@@ -78,7 +78,7 @@ class AirbyteLogFormatter(logging.Formatter):
|
|
78
78
|
log_message = AirbyteMessage(
|
79
79
|
type=Type.LOG, log=AirbyteLogMessage(level=airbyte_level, message=message)
|
80
80
|
)
|
81
|
-
return orjson.dumps(AirbyteMessageSerializer.dump(log_message)).decode()
|
81
|
+
return orjson.dumps(AirbyteMessageSerializer.dump(log_message)).decode()
|
82
82
|
|
83
83
|
@staticmethod
|
84
84
|
def extract_extra_args_from_record(record: logging.LogRecord) -> Mapping[str, Any]:
|
@@ -200,7 +200,7 @@ class AbstractSource(Source, ABC):
|
|
200
200
|
if len(stream_name_to_exception) > 0:
|
201
201
|
error_message = generate_failed_streams_error_message(
|
202
202
|
{key: [value] for key, value in stream_name_to_exception.items()}
|
203
|
-
)
|
203
|
+
)
|
204
204
|
logger.info(error_message)
|
205
205
|
# We still raise at least one exception when a stream raises an exception because the platform currently relies
|
206
206
|
# on a non-zero exit code to determine if a sync attempt has failed. We also raise the exception as a config_error
|
airbyte_cdk/sources/config.py
CHANGED
@@ -24,4 +24,4 @@ class BaseConfig(BaseModel):
|
|
24
24
|
rename_key(schema, old_key="anyOf", new_key="oneOf") # UI supports only oneOf
|
25
25
|
expand_refs(schema)
|
26
26
|
schema.pop("description", None) # description added from the docstring
|
27
|
-
return schema
|
27
|
+
return schema
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
import copy
|
6
6
|
from dataclasses import dataclass
|
7
|
-
from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union
|
7
|
+
from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union, cast
|
8
8
|
|
9
9
|
from airbyte_cdk.models import (
|
10
10
|
AirbyteMessage,
|
@@ -15,6 +15,7 @@ from airbyte_cdk.models import (
|
|
15
15
|
StreamDescriptor,
|
16
16
|
)
|
17
17
|
from airbyte_cdk.models import Type as MessageType
|
18
|
+
from airbyte_cdk.models.airbyte_protocol import AirbyteGlobalState, AirbyteStateBlob
|
18
19
|
|
19
20
|
|
20
21
|
@dataclass(frozen=True)
|
@@ -118,8 +119,12 @@ class ConnectorStateManager:
|
|
118
119
|
is_global = cls._is_global_state(state)
|
119
120
|
|
120
121
|
if is_global:
|
121
|
-
|
122
|
-
|
122
|
+
# We already validate that this is a global state message, not None:
|
123
|
+
global_state = cast(AirbyteGlobalState, state[0].global_)
|
124
|
+
# global_state has shared_state, also not None:
|
125
|
+
shared_state: AirbyteStateBlob = cast(
|
126
|
+
AirbyteStateBlob, copy.deepcopy(global_state.shared_state, {})
|
127
|
+
)
|
123
128
|
streams = {
|
124
129
|
HashableStreamDescriptor(
|
125
130
|
name=per_stream_state.stream_descriptor.name,
|
@@ -131,7 +136,7 @@ class ConnectorStateManager:
|
|
131
136
|
else:
|
132
137
|
streams = {
|
133
138
|
HashableStreamDescriptor(
|
134
|
-
name=per_stream_state.stream.stream_descriptor.name,
|
139
|
+
name=per_stream_state.stream.stream_descriptor.name, # type: ignore[union-attr] # stream has stream_descriptor
|
135
140
|
namespace=per_stream_state.stream.stream_descriptor.namespace, # type: ignore[union-attr] # stream has stream_descriptor
|
136
141
|
): per_stream_state.stream.stream_state # type: ignore[union-attr] # stream has stream_state
|
137
142
|
for per_stream_state in state
|
@@ -135,7 +135,7 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
135
135
|
return self.grant_type.eval(self.config) # type: ignore # eval returns a string in this context
|
136
136
|
|
137
137
|
def get_refresh_request_body(self) -> Mapping[str, Any]:
|
138
|
-
return self._refresh_request_body.eval(self.config)
|
138
|
+
return self._refresh_request_body.eval(self.config)
|
139
139
|
|
140
140
|
def get_token_expiry_date(self) -> pendulum.DateTime:
|
141
141
|
return self._token_expiry_date # type: ignore # _token_expiry_date is a pendulum.DateTime. It is never None despite what mypy thinks
|
@@ -28,7 +28,12 @@ class SelectiveAuthenticator(DeclarativeAuthenticator):
|
|
28
28
|
**kwargs: Any,
|
29
29
|
) -> DeclarativeAuthenticator:
|
30
30
|
try:
|
31
|
-
selected_key = str(
|
31
|
+
selected_key = str(
|
32
|
+
dpath.get(
|
33
|
+
config, # type: ignore [arg-type] # Dpath wants mutable mapping but doesn't need it.
|
34
|
+
authenticator_selection_path,
|
35
|
+
)
|
36
|
+
)
|
32
37
|
except KeyError as err:
|
33
38
|
raise ValueError(
|
34
39
|
"The path from `authenticator_selection_path` is not found in the config."
|
@@ -20,6 +20,9 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
|
20
20
|
ClientSideIncrementalRecordFilterDecorator,
|
21
21
|
)
|
22
22
|
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
23
|
+
from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
|
24
|
+
PerPartitionWithGlobalCursor,
|
25
|
+
)
|
23
26
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
24
27
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
25
28
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
@@ -56,9 +59,8 @@ from airbyte_cdk.sources.types import Config, StreamState
|
|
56
59
|
|
57
60
|
|
58
61
|
class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
59
|
-
# By default, we defer to a value of
|
60
|
-
|
61
|
-
SINGLE_THREADED_CONCURRENCY_LEVEL = 2
|
62
|
+
# By default, we defer to a value of 1 which represents running a connector using the Concurrent CDK engine on only one thread.
|
63
|
+
SINGLE_THREADED_CONCURRENCY_LEVEL = 1
|
62
64
|
|
63
65
|
def __init__(
|
64
66
|
self,
|
@@ -79,9 +81,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
79
81
|
emit_connector_builder_messages=emit_connector_builder_messages,
|
80
82
|
disable_resumable_full_refresh=True,
|
81
83
|
)
|
82
|
-
self._config = config
|
83
|
-
self._concurrent_streams: Optional[List[AbstractStream]] = None
|
84
|
-
self._synchronous_streams: Optional[List[Stream]] = None
|
85
84
|
|
86
85
|
super().__init__(
|
87
86
|
source_config=source_config,
|
@@ -92,6 +91,21 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
92
91
|
|
93
92
|
self._state = state
|
94
93
|
|
94
|
+
self._concurrent_streams: Optional[List[AbstractStream]]
|
95
|
+
self._synchronous_streams: Optional[List[Stream]]
|
96
|
+
|
97
|
+
# If the connector command was SPEC, there is no incoming config, and we cannot instantiate streams because
|
98
|
+
# they might depend on it. Ideally we want to have a static method on this class to get the spec without
|
99
|
+
# any other arguments, but the existing entrypoint.py isn't designed to support this. Just noting this
|
100
|
+
# for our future improvements to the CDK.
|
101
|
+
if config:
|
102
|
+
self._concurrent_streams, self._synchronous_streams = self._group_streams(
|
103
|
+
config=config or {}
|
104
|
+
)
|
105
|
+
else:
|
106
|
+
self._concurrent_streams = None
|
107
|
+
self._synchronous_streams = None
|
108
|
+
|
95
109
|
concurrency_level_from_manifest = self._source_config.get("concurrency_level")
|
96
110
|
if concurrency_level_from_manifest:
|
97
111
|
concurrency_level_component = self._constructor.create_component(
|
@@ -110,29 +124,16 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
110
124
|
) # Partition_generation iterates using range based on this value. If this is floored to zero we end up in a dead lock during start up
|
111
125
|
else:
|
112
126
|
concurrency_level = self.SINGLE_THREADED_CONCURRENCY_LEVEL
|
113
|
-
initial_number_of_partitions_to_generate = self.SINGLE_THREADED_CONCURRENCY_LEVEL
|
127
|
+
initial_number_of_partitions_to_generate = self.SINGLE_THREADED_CONCURRENCY_LEVEL
|
114
128
|
|
115
129
|
self._concurrent_source = ConcurrentSource.create(
|
116
130
|
num_workers=concurrency_level,
|
117
131
|
initial_number_of_partitions_to_generate=initial_number_of_partitions_to_generate,
|
118
132
|
logger=self.logger,
|
119
133
|
slice_logger=self._slice_logger,
|
120
|
-
message_repository=self.message_repository,
|
134
|
+
message_repository=self.message_repository,
|
121
135
|
)
|
122
136
|
|
123
|
-
def _actually_group(self) -> None:
|
124
|
-
# If the connector command was SPEC, there is no incoming config, and we cannot instantiate streams because
|
125
|
-
# they might depend on it. Ideally we want to have a static method on this class to get the spec without
|
126
|
-
# any other arguments, but the existing entrypoint.py isn't designed to support this. Just noting this
|
127
|
-
# for our future improvements to the CDK.
|
128
|
-
if self._config:
|
129
|
-
self._concurrent_streams, self._synchronous_streams = self._group_streams(
|
130
|
-
config=self._config or {}
|
131
|
-
)
|
132
|
-
else:
|
133
|
-
self._concurrent_streams = None
|
134
|
-
self._synchronous_streams = None
|
135
|
-
|
136
137
|
def read(
|
137
138
|
self,
|
138
139
|
logger: logging.Logger,
|
@@ -142,9 +143,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
142
143
|
) -> Iterator[AirbyteMessage]:
|
143
144
|
# ConcurrentReadProcessor pops streams that are finished being read so before syncing, the names of the concurrent
|
144
145
|
# streams must be saved so that they can be removed from the catalog before starting synchronous streams
|
145
|
-
if self._concurrent_streams is None:
|
146
|
-
self._actually_group()
|
147
|
-
|
148
146
|
if self._concurrent_streams:
|
149
147
|
concurrent_stream_names = set(
|
150
148
|
[concurrent_stream.name for concurrent_stream in self._concurrent_streams]
|
@@ -170,9 +168,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
170
168
|
yield from super().read(logger, config, filtered_catalog, state)
|
171
169
|
|
172
170
|
def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog:
|
173
|
-
if self._concurrent_streams is None:
|
174
|
-
self._actually_group()
|
175
|
-
|
176
171
|
concurrent_streams = self._concurrent_streams or []
|
177
172
|
synchronous_streams = self._synchronous_streams or []
|
178
173
|
return AirbyteCatalog(
|
@@ -201,7 +196,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
201
196
|
state_manager = ConnectorStateManager(state=self._state) # type: ignore # state is always in the form of List[AirbyteStateMessage]. The ConnectorStateManager should use generics, but this can be done later
|
202
197
|
|
203
198
|
name_to_stream_mapping = {
|
204
|
-
stream["name"]: stream for stream in self.
|
199
|
+
stream["name"]: stream for stream in self.resolved_manifest["streams"]
|
205
200
|
}
|
206
201
|
|
207
202
|
for declarative_stream in self.streams(config=config):
|
@@ -314,6 +309,59 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
314
309
|
cursor=final_state_cursor,
|
315
310
|
)
|
316
311
|
)
|
312
|
+
elif (
|
313
|
+
incremental_sync_component_definition
|
314
|
+
and incremental_sync_component_definition.get("type", "")
|
315
|
+
== DatetimeBasedCursorModel.__name__
|
316
|
+
and self._stream_supports_concurrent_partition_processing(
|
317
|
+
declarative_stream=declarative_stream
|
318
|
+
)
|
319
|
+
and hasattr(declarative_stream.retriever, "stream_slicer")
|
320
|
+
and isinstance(declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor)
|
321
|
+
):
|
322
|
+
stream_state = state_manager.get_stream_state(
|
323
|
+
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
324
|
+
)
|
325
|
+
partition_router = declarative_stream.retriever.stream_slicer._partition_router
|
326
|
+
|
327
|
+
cursor = self._constructor.create_concurrent_cursor_from_perpartition_cursor(
|
328
|
+
state_manager=state_manager,
|
329
|
+
model_type=DatetimeBasedCursorModel,
|
330
|
+
component_definition=incremental_sync_component_definition,
|
331
|
+
stream_name=declarative_stream.name,
|
332
|
+
stream_namespace=declarative_stream.namespace,
|
333
|
+
config=config or {},
|
334
|
+
stream_state=stream_state,
|
335
|
+
partition_router=partition_router,
|
336
|
+
)
|
337
|
+
|
338
|
+
|
339
|
+
partition_generator = StreamSlicerPartitionGenerator(
|
340
|
+
DeclarativePartitionFactory(
|
341
|
+
declarative_stream.name,
|
342
|
+
declarative_stream.get_json_schema(),
|
343
|
+
self._retriever_factory(
|
344
|
+
name_to_stream_mapping[declarative_stream.name],
|
345
|
+
config,
|
346
|
+
stream_state,
|
347
|
+
),
|
348
|
+
self.message_repository,
|
349
|
+
),
|
350
|
+
cursor,
|
351
|
+
)
|
352
|
+
|
353
|
+
concurrent_streams.append(
|
354
|
+
DefaultStream(
|
355
|
+
partition_generator=partition_generator,
|
356
|
+
name=declarative_stream.name,
|
357
|
+
json_schema=declarative_stream.get_json_schema(),
|
358
|
+
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
359
|
+
primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
|
360
|
+
cursor_field=cursor.cursor_field.cursor_field_key,
|
361
|
+
logger=self.logger,
|
362
|
+
cursor=cursor,
|
363
|
+
)
|
364
|
+
)
|
317
365
|
else:
|
318
366
|
synchronous_streams.append(declarative_stream)
|
319
367
|
else:
|
@@ -41,12 +41,12 @@ class MinMaxDatetime:
|
|
41
41
|
self.datetime = InterpolatedString.create(self.datetime, parameters=parameters or {})
|
42
42
|
self._parser = DatetimeParser()
|
43
43
|
self.min_datetime = (
|
44
|
-
InterpolatedString.create(self.min_datetime, parameters=parameters)
|
44
|
+
InterpolatedString.create(self.min_datetime, parameters=parameters) # type: ignore [assignment] # expression has type "InterpolatedString | None", variable has type "InterpolatedString | str"
|
45
45
|
if self.min_datetime
|
46
46
|
else None
|
47
47
|
) # type: ignore
|
48
48
|
self.max_datetime = (
|
49
|
-
InterpolatedString.create(self.max_datetime, parameters=parameters)
|
49
|
+
InterpolatedString.create(self.max_datetime, parameters=parameters) # type: ignore [assignment] # expression has type "InterpolatedString | None", variable has type "InterpolatedString | str"
|
50
50
|
if self.max_datetime
|
51
51
|
else None
|
52
52
|
) # type: ignore
|
@@ -66,7 +66,13 @@ class MinMaxDatetime:
|
|
66
66
|
datetime_format = "%Y-%m-%dT%H:%M:%S.%f%z"
|
67
67
|
|
68
68
|
time = self._parser.parse(
|
69
|
-
str(
|
69
|
+
str(
|
70
|
+
self.datetime.eval( # type: ignore[union-attr] # str has no attribute "eval"
|
71
|
+
config,
|
72
|
+
**additional_parameters,
|
73
|
+
)
|
74
|
+
),
|
75
|
+
datetime_format,
|
70
76
|
) # type: ignore # datetime is always cast to an interpolated string
|
71
77
|
|
72
78
|
if self.min_datetime:
|
@@ -105,7 +111,7 @@ class MinMaxDatetime:
|
|
105
111
|
if isinstance(interpolated_string_or_min_max_datetime, InterpolatedString) or isinstance(
|
106
112
|
interpolated_string_or_min_max_datetime, str
|
107
113
|
):
|
108
|
-
return MinMaxDatetime(
|
114
|
+
return MinMaxDatetime( # type: ignore [call-arg]
|
109
115
|
datetime=interpolated_string_or_min_max_datetime, parameters=parameters
|
110
116
|
)
|
111
117
|
else:
|
@@ -2057,7 +2057,7 @@ definitions:
|
|
2057
2057
|
The DeclarativeOAuth Specific URL templated string to obtain the `access_token`, `refresh_token` etc.
|
2058
2058
|
The placeholders are replaced during the processing to provide neccessary values.
|
2059
2059
|
examples:
|
2060
|
-
- access_token_url: https://auth.host.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}
|
2060
|
+
- access_token_url: https://auth.host.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}
|
2061
2061
|
access_token_headers:
|
2062
2062
|
title: (Optional) DeclarativeOAuth Access Token Headers
|
2063
2063
|
type: object
|
@@ -2065,9 +2065,10 @@ definitions:
|
|
2065
2065
|
description: |-
|
2066
2066
|
The DeclarativeOAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.
|
2067
2067
|
examples:
|
2068
|
-
- access_token_headers:
|
2069
|
-
|
2070
|
-
|
2068
|
+
- access_token_headers:
|
2069
|
+
{
|
2070
|
+
"Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}",
|
2071
|
+
}
|
2071
2072
|
access_token_params:
|
2072
2073
|
title: (Optional) DeclarativeOAuth Access Token Query Params (Json Encoded)
|
2073
2074
|
type: object
|
@@ -2076,18 +2077,19 @@ definitions:
|
|
2076
2077
|
The DeclarativeOAuth Specific optional query parameters to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.
|
2077
2078
|
When this property is provided, the query params will be encoded as `Json` and included in the outgoing API request.
|
2078
2079
|
examples:
|
2079
|
-
- access_token_params:
|
2080
|
-
|
2081
|
-
|
2082
|
-
|
2083
|
-
|
2080
|
+
- access_token_params:
|
2081
|
+
{
|
2082
|
+
"{auth_code_key}": "{{auth_code_key}}",
|
2083
|
+
"{client_id_key}": "{{client_id_key}}",
|
2084
|
+
"{client_secret_key}": "{{client_secret_key}}",
|
2085
|
+
}
|
2084
2086
|
extract_output:
|
2085
2087
|
title: DeclarativeOAuth Extract Output
|
2086
2088
|
type: array
|
2087
2089
|
items:
|
2088
2090
|
type: string
|
2089
2091
|
description: |-
|
2090
|
-
The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.
|
2092
|
+
The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.
|
2091
2093
|
examples:
|
2092
2094
|
- extract_output: ["access_token", "refresh_token", "other_field"]
|
2093
2095
|
state:
|
@@ -2099,17 +2101,14 @@ definitions:
|
|
2099
2101
|
- max
|
2100
2102
|
description: |-
|
2101
2103
|
The DeclarativeOAuth Specific object to provide the criteria of how the `state` query param should be constructed,
|
2102
|
-
including length and complexity.
|
2104
|
+
including length and complexity.
|
2103
2105
|
properties:
|
2104
2106
|
min:
|
2105
2107
|
type: integer
|
2106
2108
|
max:
|
2107
2109
|
type: integer
|
2108
2110
|
examples:
|
2109
|
-
- state: {
|
2110
|
-
"min": 7,
|
2111
|
-
"max": 128,
|
2112
|
-
}
|
2111
|
+
- state: { "min": 7, "max": 128 }
|
2113
2112
|
client_id_key:
|
2114
2113
|
title: (Optional) DeclarativeOAuth Client ID Key Override
|
2115
2114
|
type: string
|
@@ -2135,14 +2134,14 @@ definitions:
|
|
2135
2134
|
title: (Optional) DeclarativeOAuth State Key Override
|
2136
2135
|
type: string
|
2137
2136
|
description: |-
|
2138
|
-
The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider.
|
2137
|
+
The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider.
|
2139
2138
|
examples:
|
2140
2139
|
- state_key: "my_custom_state_key_key_name"
|
2141
2140
|
auth_code_key:
|
2142
2141
|
title: (Optional) DeclarativeOAuth Auth Code Key Override
|
2143
2142
|
type: string
|
2144
2143
|
description: |-
|
2145
|
-
The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider.
|
2144
|
+
The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider.
|
2146
2145
|
examples:
|
2147
2146
|
- auth_code_key: "my_custom_auth_code_key_name"
|
2148
2147
|
redirect_uri_key:
|
@@ -14,5 +14,8 @@ class NoopDecoder(Decoder):
|
|
14
14
|
def is_stream_response(self) -> bool:
|
15
15
|
return False
|
16
16
|
|
17
|
-
def decode(
|
17
|
+
def decode( # type: ignore[override] # Signature doesn't match base class
|
18
|
+
self,
|
19
|
+
response: requests.Response,
|
20
|
+
) -> Generator[Mapping[str, Any], None, None]:
|
18
21
|
yield from [{}]
|