airbyte-cdk 6.60.0.post35.dev16509779638__py3-none-any.whl → 6.60.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +35 -30
- airbyte_cdk/config_observation.py +2 -2
- airbyte_cdk/connector.py +2 -1
- airbyte_cdk/connector_builder/connector_builder_handler.py +6 -1
- airbyte_cdk/connector_builder/main.py +11 -18
- airbyte_cdk/connector_builder/test_reader/helpers.py +31 -0
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +5 -0
- airbyte_cdk/connector_builder/test_reader/reader.py +8 -3
- airbyte_cdk/destinations/destination.py +7 -7
- airbyte_cdk/entrypoint.py +23 -8
- airbyte_cdk/logger.py +2 -2
- airbyte_cdk/models/__init__.py +6 -7
- airbyte_cdk/models/airbyte_protocol.py +81 -2
- airbyte_cdk/models/airbyte_protocol_serializers.py +26 -152
- airbyte_cdk/models/well_known_types.py +1 -1
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +2 -4
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +15 -7
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +7 -4
- airbyte_cdk/sources/declarative/spec/spec.py +2 -2
- airbyte_cdk/sources/file_based/file_based_source.py +3 -3
- airbyte_cdk/sources/source.py +4 -2
- airbyte_cdk/sources/streams/http/http_client.py +7 -5
- airbyte_cdk/sources/streams/permissions/identities_stream.py +1 -1
- airbyte_cdk/sql/shared/sql_processor.py +1 -1
- airbyte_cdk/test/catalog_builder.py +2 -1
- airbyte_cdk/test/entrypoint_wrapper.py +16 -25
- airbyte_cdk/utils/datetime_helpers.py +5 -14
- airbyte_cdk/utils/traced_exception.py +2 -2
- {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.1.dist-info}/METADATA +11 -10
- {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.1.dist-info}/RECORD +34 -34
- {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.1.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.60.0.post35.dev16509779638.dist-info → airbyte_cdk-6.60.1.dist-info}/entry_points.txt +0 -0
@@ -32,13 +32,13 @@ from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch
|
|
32
32
|
from airbyte_cdk.models import (
|
33
33
|
AirbyteErrorTraceMessage,
|
34
34
|
AirbyteMessage,
|
35
|
+
AirbyteMessageSerializer,
|
35
36
|
AirbyteStateMessage,
|
36
37
|
AirbyteTraceMessage,
|
37
38
|
ConfiguredAirbyteCatalog,
|
39
|
+
ConnectorSpecificationSerializer,
|
38
40
|
TraceType,
|
39
41
|
Type,
|
40
|
-
ab_connector_spec_from_string,
|
41
|
-
ab_message_to_string,
|
42
42
|
)
|
43
43
|
from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
|
44
44
|
ConcurrentDeclarativeSource,
|
@@ -105,19 +105,21 @@ def _get_local_yaml_source(args: list[str]) -> SourceLocalYaml:
|
|
105
105
|
)
|
106
106
|
except Exception as error:
|
107
107
|
print(
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
108
|
+
orjson.dumps(
|
109
|
+
AirbyteMessageSerializer.dump(
|
110
|
+
AirbyteMessage(
|
111
|
+
type=Type.TRACE,
|
112
|
+
trace=AirbyteTraceMessage(
|
113
|
+
type=TraceType.ERROR,
|
114
|
+
emitted_at=ab_datetime_now().to_epoch_millis(),
|
115
|
+
error=AirbyteErrorTraceMessage(
|
116
|
+
message=f"Error starting the sync. This could be due to an invalid configuration or catalog. Please contact Support for assistance. Error: {error}",
|
117
|
+
stack_trace=traceback.format_exc(),
|
118
|
+
),
|
117
119
|
),
|
118
|
-
)
|
119
|
-
)
|
120
|
-
)
|
120
|
+
)
|
121
|
+
)
|
122
|
+
).decode()
|
121
123
|
)
|
122
124
|
raise error
|
123
125
|
|
@@ -147,10 +149,11 @@ def handle_remote_manifest_command(args: list[str]) -> None:
|
|
147
149
|
"Could not find `spec.json` file for source-declarative-manifest"
|
148
150
|
)
|
149
151
|
|
150
|
-
|
152
|
+
spec_obj = json.loads(json_spec)
|
153
|
+
spec = ConnectorSpecificationSerializer.load(spec_obj)
|
151
154
|
|
152
155
|
message = AirbyteMessage(type=Type.SPEC, spec=spec)
|
153
|
-
print(
|
156
|
+
print(AirbyteEntrypoint.airbyte_message_to_string(message))
|
154
157
|
else:
|
155
158
|
source = create_declarative_source(args)
|
156
159
|
launch(
|
@@ -212,19 +215,21 @@ def create_declarative_source(
|
|
212
215
|
)
|
213
216
|
except Exception as error:
|
214
217
|
print(
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
218
|
+
orjson.dumps(
|
219
|
+
AirbyteMessageSerializer.dump(
|
220
|
+
AirbyteMessage(
|
221
|
+
type=Type.TRACE,
|
222
|
+
trace=AirbyteTraceMessage(
|
223
|
+
type=TraceType.ERROR,
|
224
|
+
emitted_at=ab_datetime_now().to_epoch_millis(),
|
225
|
+
error=AirbyteErrorTraceMessage(
|
226
|
+
message=f"Error starting the sync. This could be due to an invalid configuration or catalog. Please contact Support for assistance. Error: {error}",
|
227
|
+
stack_trace=traceback.format_exc(),
|
228
|
+
),
|
224
229
|
),
|
225
|
-
)
|
226
|
-
)
|
227
|
-
)
|
230
|
+
)
|
231
|
+
)
|
232
|
+
).decode()
|
228
233
|
)
|
229
234
|
raise error
|
230
235
|
|
@@ -293,10 +298,10 @@ def _register_components_from_file(filepath: str) -> None:
|
|
293
298
|
spec.loader.exec_module(module)
|
294
299
|
|
295
300
|
|
296
|
-
def run(
|
301
|
+
def run() -> None:
|
297
302
|
"""Run the `source-declarative-manifest` CLI.
|
298
303
|
|
299
304
|
Args are detected from the command line, and the appropriate command is executed.
|
300
305
|
"""
|
301
|
-
args =
|
306
|
+
args: list[str] = sys.argv[1:]
|
302
307
|
handle_command(args)
|
@@ -16,9 +16,9 @@ from airbyte_cdk.models import (
|
|
16
16
|
AirbyteControlConnectorConfigMessage,
|
17
17
|
AirbyteControlMessage,
|
18
18
|
AirbyteMessage,
|
19
|
+
AirbyteMessageSerializer,
|
19
20
|
OrchestratorType,
|
20
21
|
Type,
|
21
|
-
ab_message_to_string,
|
22
22
|
)
|
23
23
|
|
24
24
|
|
@@ -92,7 +92,7 @@ def emit_configuration_as_airbyte_control_message(config: MutableMapping[str, An
|
|
92
92
|
See the airbyte_cdk.sources.message package
|
93
93
|
"""
|
94
94
|
airbyte_message = create_connector_config_control_message(config)
|
95
|
-
print(
|
95
|
+
print(orjson.dumps(AirbyteMessageSerializer.dump(airbyte_message)).decode())
|
96
96
|
|
97
97
|
|
98
98
|
def create_connector_config_control_message(config: MutableMapping[str, Any]) -> AirbyteMessage:
|
airbyte_cdk/connector.py
CHANGED
@@ -15,6 +15,7 @@ import yaml
|
|
15
15
|
from airbyte_cdk.models import (
|
16
16
|
AirbyteConnectionStatus,
|
17
17
|
ConnectorSpecification,
|
18
|
+
ConnectorSpecificationSerializer,
|
18
19
|
)
|
19
20
|
|
20
21
|
|
@@ -94,7 +95,7 @@ class BaseConnector(ABC, Generic[TConfig]):
|
|
94
95
|
else:
|
95
96
|
raise FileNotFoundError("Unable to find spec.yaml or spec.json in the package.")
|
96
97
|
|
97
|
-
return
|
98
|
+
return ConnectorSpecificationSerializer.load(spec_obj)
|
98
99
|
|
99
100
|
@abstractmethod
|
100
101
|
def check(self, logger: logging.Logger, config: TConfig) -> AirbyteConnectionStatus:
|
@@ -108,7 +108,12 @@ def read_stream(
|
|
108
108
|
stream_name = configured_catalog.streams[0].stream.name
|
109
109
|
|
110
110
|
stream_read = test_read_handler.run_test_read(
|
111
|
-
source,
|
111
|
+
source,
|
112
|
+
config,
|
113
|
+
configured_catalog,
|
114
|
+
stream_name,
|
115
|
+
state,
|
116
|
+
limits.max_records,
|
112
117
|
)
|
113
118
|
|
114
119
|
return AirbyteMessage(
|
@@ -20,10 +20,11 @@ from airbyte_cdk.connector_builder.connector_builder_handler import (
|
|
20
20
|
from airbyte_cdk.entrypoint import AirbyteEntrypoint
|
21
21
|
from airbyte_cdk.models import (
|
22
22
|
AirbyteMessage,
|
23
|
+
AirbyteMessageSerializer,
|
23
24
|
AirbyteStateMessage,
|
24
25
|
ConfiguredAirbyteCatalog,
|
26
|
+
ConfiguredAirbyteCatalogSerializer,
|
25
27
|
)
|
26
|
-
from airbyte_cdk.models.airbyte_protocol_serializers import ab_message_to_string
|
27
28
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
28
29
|
from airbyte_cdk.sources.source import Source
|
29
30
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
@@ -52,7 +53,7 @@ def get_config_and_catalog_from_args(
|
|
52
53
|
|
53
54
|
command = config["__command"]
|
54
55
|
if command == "test_read":
|
55
|
-
catalog =
|
56
|
+
catalog = ConfiguredAirbyteCatalogSerializer.load(BaseConnector.read_config(catalog_path))
|
56
57
|
state = Source.read_state(state_path)
|
57
58
|
else:
|
58
59
|
catalog = None
|
@@ -91,27 +92,19 @@ def handle_request(args: List[str]) -> str:
|
|
91
92
|
command, config, catalog, state = get_config_and_catalog_from_args(args)
|
92
93
|
limits = get_limits(config)
|
93
94
|
source = create_source(config, limits)
|
94
|
-
return
|
95
|
-
|
96
|
-
|
95
|
+
return orjson.dumps(
|
96
|
+
AirbyteMessageSerializer.dump(
|
97
|
+
handle_connector_builder_request(source, command, config, catalog, state, limits)
|
98
|
+
)
|
99
|
+
).decode() # type: ignore[no-any-return] # Serializer.dump() always returns AirbyteMessage
|
97
100
|
|
98
|
-
def run(args: list[str] | None) -> None:
|
99
|
-
"""Run the connector builder handler."""
|
100
|
-
if args is None:
|
101
|
-
args = sys.argv[1:]
|
102
101
|
|
102
|
+
if __name__ == "__main__":
|
103
103
|
try:
|
104
|
-
|
105
|
-
print(result)
|
104
|
+
print(handle_request(sys.argv[1:]))
|
106
105
|
except Exception as exc:
|
107
106
|
error = AirbyteTracedException.from_exception(
|
108
107
|
exc, message=f"Error handling request: {str(exc)}"
|
109
108
|
)
|
110
109
|
m = error.as_airbyte_message()
|
111
|
-
print(
|
112
|
-
sys.exit(1)
|
113
|
-
|
114
|
-
|
115
|
-
if __name__ == "__main__":
|
116
|
-
run(sys.argv[1:])
|
117
|
-
sys.exit(1)
|
110
|
+
print(orjson.dumps(AirbyteMessageSerializer.dump(m)).decode())
|
@@ -269,6 +269,37 @@ def should_close_page_for_slice(at_least_one_page_in_group: bool, message: Airby
|
|
269
269
|
return at_least_one_page_in_group and should_process_slice_descriptor(message)
|
270
270
|
|
271
271
|
|
272
|
+
def is_page_http_request_for_different_stream(
|
273
|
+
json_message: Optional[Dict[str, Any]], stream_name: str
|
274
|
+
) -> bool:
|
275
|
+
"""
|
276
|
+
Determines whether a given JSON message represents a page HTTP request for a different stream.
|
277
|
+
|
278
|
+
This function checks if the provided JSON message is a page HTTP request, and if the stream name in the log is
|
279
|
+
different from the provided stream name.
|
280
|
+
|
281
|
+
This is needed because dynamic streams result in extra page HTTP requests for the dynamic streams that we want to ignore
|
282
|
+
when they do not match the stream that is being read.
|
283
|
+
|
284
|
+
Args:
|
285
|
+
json_message (Optional[Dict[str, Any]]): The JSON message to evaluate.
|
286
|
+
stream_name (str): The name of the stream to compare against.
|
287
|
+
|
288
|
+
Returns:
|
289
|
+
bool: True if the JSON message is a page HTTP request for a different stream, False otherwise.
|
290
|
+
"""
|
291
|
+
if not json_message or not is_page_http_request(json_message):
|
292
|
+
return False
|
293
|
+
|
294
|
+
message_stream_name: str | None = (
|
295
|
+
json_message.get("airbyte_cdk", {}).get("stream", {}).get("name", None)
|
296
|
+
)
|
297
|
+
if message_stream_name is None:
|
298
|
+
return False
|
299
|
+
|
300
|
+
return message_stream_name != stream_name
|
301
|
+
|
302
|
+
|
272
303
|
def is_page_http_request(json_message: Optional[Dict[str, Any]]) -> bool:
|
273
304
|
"""
|
274
305
|
Determines whether a given JSON message represents a page HTTP request.
|
@@ -28,6 +28,7 @@ from .helpers import (
|
|
28
28
|
is_async_auxiliary_request,
|
29
29
|
is_config_update_message,
|
30
30
|
is_log_message,
|
31
|
+
is_page_http_request_for_different_stream,
|
31
32
|
is_record_message,
|
32
33
|
is_state_message,
|
33
34
|
is_trace_with_error,
|
@@ -44,6 +45,7 @@ def get_message_groups(
|
|
44
45
|
schema_inferrer: SchemaInferrer,
|
45
46
|
datetime_format_inferrer: DatetimeFormatInferrer,
|
46
47
|
limit: int,
|
48
|
+
stream_name: str,
|
47
49
|
) -> MESSAGE_GROUPS:
|
48
50
|
"""
|
49
51
|
Processes an iterator of AirbyteMessage objects to group and yield messages based on their type and sequence.
|
@@ -96,6 +98,9 @@ def get_message_groups(
|
|
96
98
|
while records_count < limit and (message := next(messages, None)):
|
97
99
|
json_message = airbyte_message_to_json(message)
|
98
100
|
|
101
|
+
if is_page_http_request_for_different_stream(json_message, stream_name):
|
102
|
+
continue
|
103
|
+
|
99
104
|
if should_close_page(at_least_one_page_in_group, message, json_message):
|
100
105
|
current_page_request, current_page_response = handle_current_page(
|
101
106
|
current_page_request,
|
@@ -86,6 +86,7 @@ class TestReader:
|
|
86
86
|
source: DeclarativeSource,
|
87
87
|
config: Mapping[str, Any],
|
88
88
|
configured_catalog: ConfiguredAirbyteCatalog,
|
89
|
+
stream_name: str,
|
89
90
|
state: List[AirbyteStateMessage],
|
90
91
|
record_limit: Optional[int] = None,
|
91
92
|
) -> StreamRead:
|
@@ -112,14 +113,17 @@ class TestReader:
|
|
112
113
|
|
113
114
|
record_limit = self._check_record_limit(record_limit)
|
114
115
|
# The connector builder currently only supports reading from a single stream at a time
|
115
|
-
|
116
|
+
streams = source.streams(config)
|
117
|
+
stream = next((stream for stream in streams if stream.name == stream_name), None)
|
116
118
|
|
117
119
|
# get any deprecation warnings during the component creation
|
118
120
|
deprecation_warnings: List[LogMessage] = source.deprecation_warnings()
|
119
121
|
|
120
122
|
schema_inferrer = SchemaInferrer(
|
121
|
-
self._pk_to_nested_and_composite_field(stream.primary_key),
|
122
|
-
self._cursor_field_to_nested_and_composite_field(stream.cursor_field)
|
123
|
+
self._pk_to_nested_and_composite_field(stream.primary_key) if stream else None,
|
124
|
+
self._cursor_field_to_nested_and_composite_field(stream.cursor_field)
|
125
|
+
if stream
|
126
|
+
else None,
|
123
127
|
)
|
124
128
|
datetime_format_inferrer = DatetimeFormatInferrer()
|
125
129
|
|
@@ -128,6 +132,7 @@ class TestReader:
|
|
128
132
|
schema_inferrer,
|
129
133
|
datetime_format_inferrer,
|
130
134
|
record_limit,
|
135
|
+
stream_name,
|
131
136
|
)
|
132
137
|
|
133
138
|
slices, log_messages, auxiliary_requests, latest_config_update = self._categorise_groups(
|
@@ -15,12 +15,10 @@ from airbyte_cdk.connector import Connector
|
|
15
15
|
from airbyte_cdk.exception_handler import init_uncaught_exception_handler
|
16
16
|
from airbyte_cdk.models import (
|
17
17
|
AirbyteMessage,
|
18
|
+
AirbyteMessageSerializer,
|
18
19
|
ConfiguredAirbyteCatalog,
|
20
|
+
ConfiguredAirbyteCatalogSerializer,
|
19
21
|
Type,
|
20
|
-
ab_configured_catalog_from_string,
|
21
|
-
ab_configured_catalog_to_string,
|
22
|
-
ab_message_from_string,
|
23
|
-
ab_message_to_string,
|
24
22
|
)
|
25
23
|
from airbyte_cdk.sources.utils.schema_helpers import check_config_against_spec_or_exit
|
26
24
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
@@ -48,7 +46,7 @@ class Destination(Connector, ABC):
|
|
48
46
|
"""Reads from stdin, converting to Airbyte messages"""
|
49
47
|
for line in input_stream:
|
50
48
|
try:
|
51
|
-
yield
|
49
|
+
yield AirbyteMessageSerializer.load(orjson.loads(line))
|
52
50
|
except orjson.JSONDecodeError:
|
53
51
|
logger.info(
|
54
52
|
f"ignoring input which can't be deserialized as Airbyte Message: {line}"
|
@@ -60,7 +58,9 @@ class Destination(Connector, ABC):
|
|
60
58
|
configured_catalog_path: str,
|
61
59
|
input_stream: io.TextIOWrapper,
|
62
60
|
) -> Iterable[AirbyteMessage]:
|
63
|
-
catalog =
|
61
|
+
catalog = ConfiguredAirbyteCatalogSerializer.load(
|
62
|
+
orjson.loads(open(configured_catalog_path).read())
|
63
|
+
)
|
64
64
|
input_messages = self._parse_input_stream(input_stream)
|
65
65
|
logger.info("Begin writing to the destination...")
|
66
66
|
yield from self.write(
|
@@ -151,4 +151,4 @@ class Destination(Connector, ABC):
|
|
151
151
|
parsed_args = self.parse_args(args)
|
152
152
|
output_messages = self.run_cmd(parsed_args)
|
153
153
|
for message in output_messages:
|
154
|
-
print(
|
154
|
+
print(orjson.dumps(AirbyteMessageSerializer.dump(message)).decode())
|
airbyte_cdk/entrypoint.py
CHANGED
@@ -26,12 +26,12 @@ from airbyte_cdk.logger import PRINT_BUFFER, init_logger
|
|
26
26
|
from airbyte_cdk.models import (
|
27
27
|
AirbyteConnectionStatus,
|
28
28
|
AirbyteMessage,
|
29
|
+
AirbyteMessageSerializer,
|
29
30
|
AirbyteStateStats,
|
30
31
|
ConnectorSpecification,
|
31
32
|
FailureType,
|
32
33
|
Status,
|
33
34
|
Type,
|
34
|
-
ab_message_to_string,
|
35
35
|
)
|
36
36
|
from airbyte_cdk.sources import Source
|
37
37
|
from airbyte_cdk.sources.connector_state_manager import HashableStreamDescriptor
|
@@ -47,6 +47,7 @@ logger = init_logger("airbyte")
|
|
47
47
|
|
48
48
|
VALID_URL_SCHEMES = ["https"]
|
49
49
|
CLOUD_DEPLOYMENT_MODE = "cloud"
|
50
|
+
_HAS_LOGGED_FOR_SERIALIZATION_ERROR = False
|
50
51
|
|
51
52
|
|
52
53
|
class AirbyteEntrypoint(object):
|
@@ -177,26 +178,26 @@ class AirbyteEntrypoint(object):
|
|
177
178
|
if cmd == "spec":
|
178
179
|
message = AirbyteMessage(type=Type.SPEC, spec=source_spec)
|
179
180
|
yield from [
|
180
|
-
|
181
|
+
self.airbyte_message_to_string(queued_message)
|
181
182
|
for queued_message in self._emit_queued_messages(self.source)
|
182
183
|
]
|
183
|
-
yield
|
184
|
+
yield self.airbyte_message_to_string(message)
|
184
185
|
else:
|
185
186
|
raw_config = self.source.read_config(parsed_args.config)
|
186
187
|
config = self.source.configure(raw_config, temp_dir)
|
187
188
|
|
188
189
|
yield from [
|
189
|
-
|
190
|
+
self.airbyte_message_to_string(queued_message)
|
190
191
|
for queued_message in self._emit_queued_messages(self.source)
|
191
192
|
]
|
192
193
|
if cmd == "check":
|
193
194
|
yield from map(
|
194
|
-
|
195
|
+
AirbyteEntrypoint.airbyte_message_to_string,
|
195
196
|
self.check(source_spec, config),
|
196
197
|
)
|
197
198
|
elif cmd == "discover":
|
198
199
|
yield from map(
|
199
|
-
|
200
|
+
AirbyteEntrypoint.airbyte_message_to_string,
|
200
201
|
self.discover(source_spec, config),
|
201
202
|
)
|
202
203
|
elif cmd == "read":
|
@@ -204,14 +205,14 @@ class AirbyteEntrypoint(object):
|
|
204
205
|
state = self.source.read_state(parsed_args.state)
|
205
206
|
|
206
207
|
yield from map(
|
207
|
-
|
208
|
+
AirbyteEntrypoint.airbyte_message_to_string,
|
208
209
|
self.read(source_spec, config, config_catalog, state),
|
209
210
|
)
|
210
211
|
else:
|
211
212
|
raise Exception("Unexpected command " + cmd)
|
212
213
|
finally:
|
213
214
|
yield from [
|
214
|
-
|
215
|
+
self.airbyte_message_to_string(queued_message)
|
215
216
|
for queued_message in self._emit_queued_messages(self.source)
|
216
217
|
]
|
217
218
|
|
@@ -326,6 +327,20 @@ class AirbyteEntrypoint(object):
|
|
326
327
|
config_secrets = get_secrets(connection_specification, config)
|
327
328
|
update_secrets(config_secrets)
|
328
329
|
|
330
|
+
@staticmethod
|
331
|
+
def airbyte_message_to_string(airbyte_message: AirbyteMessage) -> str:
|
332
|
+
global _HAS_LOGGED_FOR_SERIALIZATION_ERROR
|
333
|
+
serialized_message = AirbyteMessageSerializer.dump(airbyte_message)
|
334
|
+
try:
|
335
|
+
return orjson.dumps(serialized_message).decode()
|
336
|
+
except Exception as exception:
|
337
|
+
if not _HAS_LOGGED_FOR_SERIALIZATION_ERROR:
|
338
|
+
logger.warning(
|
339
|
+
f"There was an error during the serialization of an AirbyteMessage: `{exception}`. This might impact the sync performances."
|
340
|
+
)
|
341
|
+
_HAS_LOGGED_FOR_SERIALIZATION_ERROR = True
|
342
|
+
return json.dumps(serialized_message)
|
343
|
+
|
329
344
|
@classmethod
|
330
345
|
def extract_state(cls, args: List[str]) -> Optional[Any]:
|
331
346
|
parsed_args = cls.parse_args(args)
|
airbyte_cdk/logger.py
CHANGED
@@ -12,9 +12,9 @@ import orjson
|
|
12
12
|
from airbyte_cdk.models import (
|
13
13
|
AirbyteLogMessage,
|
14
14
|
AirbyteMessage,
|
15
|
+
AirbyteMessageSerializer,
|
15
16
|
Level,
|
16
17
|
Type,
|
17
|
-
ab_message_to_string,
|
18
18
|
)
|
19
19
|
from airbyte_cdk.utils import PrintBuffer
|
20
20
|
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
|
@@ -81,7 +81,7 @@ class AirbyteLogFormatter(logging.Formatter):
|
|
81
81
|
log_message = AirbyteMessage(
|
82
82
|
type=Type.LOG, log=AirbyteLogMessage(level=airbyte_level, message=message)
|
83
83
|
)
|
84
|
-
return
|
84
|
+
return orjson.dumps(AirbyteMessageSerializer.dump(log_message)).decode()
|
85
85
|
|
86
86
|
@staticmethod
|
87
87
|
def extract_extra_args_from_record(record: logging.LogRecord) -> Mapping[str, Any]:
|
airbyte_cdk/models/__init__.py
CHANGED
@@ -50,13 +50,12 @@ from .airbyte_protocol import (
|
|
50
50
|
Type,
|
51
51
|
)
|
52
52
|
from .airbyte_protocol_serializers import (
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
ab_state_message_to_string,
|
53
|
+
AirbyteMessageSerializer,
|
54
|
+
AirbyteStateMessageSerializer,
|
55
|
+
AirbyteStreamStateSerializer,
|
56
|
+
ConfiguredAirbyteCatalogSerializer,
|
57
|
+
ConfiguredAirbyteStreamSerializer,
|
58
|
+
ConnectorSpecificationSerializer,
|
60
59
|
)
|
61
60
|
from .well_known_types import (
|
62
61
|
BinaryData,
|
@@ -2,8 +2,87 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
import sys
|
6
5
|
from dataclasses import InitVar, dataclass
|
7
6
|
from typing import Annotated, Any, Dict, List, Mapping, Optional, Union
|
8
7
|
|
9
|
-
from
|
8
|
+
from airbyte_protocol_dataclasses.models import * # noqa: F403 # Allow '*'
|
9
|
+
from serpyco_rs.metadata import Alias
|
10
|
+
|
11
|
+
# ruff: noqa: F405 # ignore fuzzy import issues with 'import *'
|
12
|
+
|
13
|
+
|
14
|
+
@dataclass
|
15
|
+
class AirbyteStateBlob:
|
16
|
+
"""
|
17
|
+
A dataclass that dynamically sets attributes based on provided keyword arguments and positional arguments.
|
18
|
+
Used to "mimic" pydantic Basemodel with ConfigDict(extra='allow') option.
|
19
|
+
|
20
|
+
The `AirbyteStateBlob` class allows for flexible instantiation by accepting any number of keyword arguments
|
21
|
+
and positional arguments. These are used to dynamically update the instance's attributes. This class is useful
|
22
|
+
in scenarios where the attributes of an object are not known until runtime and need to be set dynamically.
|
23
|
+
|
24
|
+
Attributes:
|
25
|
+
kwargs (InitVar[Mapping[str, Any]]): A dictionary of keyword arguments used to set attributes dynamically.
|
26
|
+
|
27
|
+
Methods:
|
28
|
+
__init__(*args: Any, **kwargs: Any) -> None:
|
29
|
+
Initializes the `AirbyteStateBlob` by setting attributes from the provided arguments.
|
30
|
+
|
31
|
+
__eq__(other: object) -> bool:
|
32
|
+
Checks equality between two `AirbyteStateBlob` instances based on their internal dictionaries.
|
33
|
+
Returns `False` if the other object is not an instance of `AirbyteStateBlob`.
|
34
|
+
"""
|
35
|
+
|
36
|
+
kwargs: InitVar[Mapping[str, Any]]
|
37
|
+
|
38
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
39
|
+
# Set any attribute passed in through kwargs
|
40
|
+
for arg in args:
|
41
|
+
self.__dict__.update(arg)
|
42
|
+
for key, value in kwargs.items():
|
43
|
+
setattr(self, key, value)
|
44
|
+
|
45
|
+
def __eq__(self, other: object) -> bool:
|
46
|
+
return (
|
47
|
+
False
|
48
|
+
if not isinstance(other, AirbyteStateBlob)
|
49
|
+
else bool(self.__dict__ == other.__dict__)
|
50
|
+
)
|
51
|
+
|
52
|
+
|
53
|
+
# The following dataclasses have been redeclared to include the new version of AirbyteStateBlob
|
54
|
+
@dataclass
|
55
|
+
class AirbyteStreamState:
|
56
|
+
stream_descriptor: StreamDescriptor # type: ignore [name-defined]
|
57
|
+
stream_state: Optional[AirbyteStateBlob] = None
|
58
|
+
|
59
|
+
|
60
|
+
@dataclass
|
61
|
+
class AirbyteGlobalState:
|
62
|
+
stream_states: List[AirbyteStreamState]
|
63
|
+
shared_state: Optional[AirbyteStateBlob] = None
|
64
|
+
|
65
|
+
|
66
|
+
@dataclass
|
67
|
+
class AirbyteStateMessage:
|
68
|
+
type: Optional[AirbyteStateType] = None # type: ignore [name-defined]
|
69
|
+
stream: Optional[AirbyteStreamState] = None
|
70
|
+
global_: Annotated[AirbyteGlobalState | None, Alias("global")] = (
|
71
|
+
None # "global" is a reserved keyword in python ⇒ Alias is used for (de-)serialization
|
72
|
+
)
|
73
|
+
data: Optional[Dict[str, Any]] = None
|
74
|
+
sourceStats: Optional[AirbyteStateStats] = None # type: ignore [name-defined]
|
75
|
+
destinationStats: Optional[AirbyteStateStats] = None # type: ignore [name-defined]
|
76
|
+
|
77
|
+
|
78
|
+
@dataclass
|
79
|
+
class AirbyteMessage:
|
80
|
+
type: Type # type: ignore [name-defined]
|
81
|
+
log: Optional[AirbyteLogMessage] = None # type: ignore [name-defined]
|
82
|
+
spec: Optional[ConnectorSpecification] = None # type: ignore [name-defined]
|
83
|
+
connectionStatus: Optional[AirbyteConnectionStatus] = None # type: ignore [name-defined]
|
84
|
+
catalog: Optional[AirbyteCatalog] = None # type: ignore [name-defined]
|
85
|
+
record: Optional[AirbyteRecordMessage] = None # type: ignore [name-defined]
|
86
|
+
state: Optional[AirbyteStateMessage] = None
|
87
|
+
trace: Optional[AirbyteTraceMessage] = None # type: ignore [name-defined]
|
88
|
+
control: Optional[AirbyteControlMessage] = None # type: ignore [name-defined]
|