airbyte-cdk 0.39.3__py3-none-any.whl → 0.39.4__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/connector_builder/message_grouper.py +17 -19
- airbyte_cdk/connector_builder/models.py +1 -0
- {airbyte_cdk-0.39.3.dist-info → airbyte_cdk-0.39.4.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.39.3.dist-info → airbyte_cdk-0.39.4.dist-info}/RECORD +9 -9
- unit_tests/connector_builder/test_connector_builder_handler.py +4 -1
- unit_tests/connector_builder/test_message_grouper.py +100 -3
- {airbyte_cdk-0.39.3.dist-info → airbyte_cdk-0.39.4.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.39.3.dist-info → airbyte_cdk-0.39.4.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.39.3.dist-info → airbyte_cdk-0.39.4.dist-info}/top_level.txt +0 -0
@@ -16,11 +16,13 @@ from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
|
|
16
16
|
from airbyte_cdk.utils import AirbyteTracedException
|
17
17
|
from airbyte_cdk.utils.schema_inferrer import SchemaInferrer
|
18
18
|
from airbyte_protocol.models.airbyte_protocol import (
|
19
|
+
AirbyteControlMessage,
|
19
20
|
AirbyteLogMessage,
|
20
21
|
AirbyteMessage,
|
21
22
|
AirbyteTraceMessage,
|
22
23
|
ConfiguredAirbyteCatalog,
|
23
24
|
Level,
|
25
|
+
OrchestratorType,
|
24
26
|
TraceType,
|
25
27
|
)
|
26
28
|
from airbyte_protocol.models.airbyte_protocol import Type as MessageType
|
@@ -52,6 +54,7 @@ class MessageGrouper:
|
|
52
54
|
|
53
55
|
slices = []
|
54
56
|
log_messages = []
|
57
|
+
latest_config_update: AirbyteControlMessage = None
|
55
58
|
for message_group in self._get_message_groups(
|
56
59
|
self._read_stream(source, config, configured_catalog),
|
57
60
|
schema_inferrer,
|
@@ -63,7 +66,9 @@ class MessageGrouper:
|
|
63
66
|
if message_group.type == TraceType.ERROR:
|
64
67
|
error_message = f"{message_group.error.message} - {message_group.error.stack_trace}"
|
65
68
|
log_messages.append(LogMessage(**{"message": error_message, "level": "ERROR"}))
|
66
|
-
|
69
|
+
elif isinstance(message_group, AirbyteControlMessage):
|
70
|
+
if not latest_config_update or latest_config_update.emitted_at <= message_group.emitted_at:
|
71
|
+
latest_config_update = message_group
|
67
72
|
else:
|
68
73
|
slices.append(message_group)
|
69
74
|
|
@@ -74,11 +79,12 @@ class MessageGrouper:
|
|
74
79
|
inferred_schema=schema_inferrer.get_stream_schema(
|
75
80
|
configured_catalog.streams[0].stream.name
|
76
81
|
), # The connector builder currently only supports reading from a single stream at a time
|
82
|
+
latest_config_update=latest_config_update.connectorConfig.config if latest_config_update else self._clean_config(config),
|
77
83
|
)
|
78
84
|
|
79
85
|
def _get_message_groups(
|
80
86
|
self, messages: Iterator[AirbyteMessage], schema_inferrer: SchemaInferrer, limit: int
|
81
|
-
) -> Iterable[Union[StreamReadPages, AirbyteLogMessage, AirbyteTraceMessage]]:
|
87
|
+
) -> Iterable[Union[StreamReadPages, AirbyteControlMessage, AirbyteLogMessage, AirbyteTraceMessage]]:
|
82
88
|
"""
|
83
89
|
Message groups are partitioned according to when request log messages are received. Subsequent response log messages
|
84
90
|
and record messages belong to the prior request log message and when we encounter another request, append the latest
|
@@ -135,6 +141,8 @@ class MessageGrouper:
|
|
135
141
|
current_page_records.append(message.record.data)
|
136
142
|
records_count += 1
|
137
143
|
schema_inferrer.accumulate(message.record)
|
144
|
+
elif message.type == MessageType.CONTROL and message.control.type == OrchestratorType.CONNECTOR_CONFIG:
|
145
|
+
yield message.control
|
138
146
|
else:
|
139
147
|
self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records, validate_page_complete=not had_error)
|
140
148
|
yield StreamReadSlices(pages=current_slice_pages, slice_descriptor=current_slice_descriptor)
|
@@ -217,20 +225,10 @@ class MessageGrouper:
|
|
217
225
|
def _parse_slice_description(self, log_message):
|
218
226
|
return json.loads(log_message.replace(AbstractSource.SLICE_LOG_PREFIX, "", 1))
|
219
227
|
|
220
|
-
@
|
221
|
-
def
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
"name": stream_name,
|
228
|
-
"json_schema": {},
|
229
|
-
"supported_sync_modes": ["full_refresh", "incremental"],
|
230
|
-
},
|
231
|
-
"sync_mode": "full_refresh",
|
232
|
-
"destination_sync_mode": "overwrite",
|
233
|
-
}
|
234
|
-
]
|
235
|
-
}
|
236
|
-
)
|
228
|
+
@staticmethod
|
229
|
+
def _clean_config(config: Mapping[str, Any]):
|
230
|
+
cleaned_config = deepcopy(config)
|
231
|
+
for key in config.keys():
|
232
|
+
if key.startswith("__"):
|
233
|
+
del cleaned_config[key]
|
234
|
+
return cleaned_config
|
@@ -8,8 +8,8 @@ airbyte_cdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
airbyte_cdk/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
9
9
|
airbyte_cdk/connector_builder/connector_builder_handler.py,sha256=q8mqQjNqpvHZgwVbNuvSe19o4Aw6MQTuhA2URmdz0K0,5443
|
10
10
|
airbyte_cdk/connector_builder/main.py,sha256=jn2gqaYAvd6uDoFe0oVhnY23grm5sL-jfIX6kGvhVxk,2994
|
11
|
-
airbyte_cdk/connector_builder/message_grouper.py,sha256=
|
12
|
-
airbyte_cdk/connector_builder/models.py,sha256=
|
11
|
+
airbyte_cdk/connector_builder/message_grouper.py,sha256=uJGOBhinvbisgAa-bQN3XE2L2xFTeVeykLwDCRYcxgc,12110
|
12
|
+
airbyte_cdk/connector_builder/models.py,sha256=yW_j91B-3FYNTNbWjR2ZVYTXBHlskT55uxdAqg7FhAE,1221
|
13
13
|
airbyte_cdk/destinations/__init__.py,sha256=0Uxmz3iBAyZJdk_bqUVt2pb0UwRTpFjTnFE6fQFbWKY,126
|
14
14
|
airbyte_cdk/destinations/destination.py,sha256=_tIMnKcRQbtIsjVvNOVjfbIxgCNLuBXQwQj8MyVm3BI,5420
|
15
15
|
airbyte_cdk/models/__init__.py,sha256=LPQcYdDPwrCXiBPe_jexO4UAcbovIb1V9tHB6I7Un30,633
|
@@ -163,8 +163,8 @@ airbyte_cdk/utils/traced_exception.py,sha256=9G2sG9eYkvn6Aa7rMuUW_KIRszRaTc_xdnT
|
|
163
163
|
source_declarative_manifest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
164
164
|
source_declarative_manifest/main.py,sha256=HXzuRsRyhHwPrGU-hc4S7RrgoOoHImqkdfbmO2geBeE,1027
|
165
165
|
unit_tests/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
166
|
-
unit_tests/connector_builder/test_connector_builder_handler.py,sha256=
|
167
|
-
unit_tests/connector_builder/test_message_grouper.py,sha256=
|
166
|
+
unit_tests/connector_builder/test_connector_builder_handler.py,sha256=V9p7AFECaLqSK-iGvu0OqwV6qREQC2BhWo0H4OoiiK4,26895
|
167
|
+
unit_tests/connector_builder/test_message_grouper.py,sha256=XMVRW45RDTgy1YVzkV-jOXj7Ar2mzgDV8OW2QDzZjYU,28510
|
168
168
|
unit_tests/connector_builder/utils.py,sha256=AAggdGWP-mNuWOZUHLAVIbjTeIcdPo-3pbMm5zdYpS0,796
|
169
169
|
unit_tests/destinations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
170
170
|
unit_tests/destinations/test_destination.py,sha256=koG_j812KMkcIxoUH6XlAL3zsephZJmlHvyzJXm0dCs,10269
|
@@ -262,8 +262,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg
|
|
262
262
|
unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
|
263
263
|
unit_tests/utils/test_stream_status_utils.py,sha256=NpV155JMXA6CG-2Zvofa14lItobyh3Onttc59X4m5DI,3382
|
264
264
|
unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
|
265
|
-
airbyte_cdk-0.39.
|
266
|
-
airbyte_cdk-0.39.
|
267
|
-
airbyte_cdk-0.39.
|
268
|
-
airbyte_cdk-0.39.
|
269
|
-
airbyte_cdk-0.39.
|
265
|
+
airbyte_cdk-0.39.4.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
266
|
+
airbyte_cdk-0.39.4.dist-info/METADATA,sha256=pIdFzow6PMTMXmX-gel8XxP1dX6FwtRScbO35opWUzM,8902
|
267
|
+
airbyte_cdk-0.39.4.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
268
|
+
airbyte_cdk-0.39.4.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
|
269
|
+
airbyte_cdk-0.39.4.dist-info/RECORD,,
|
@@ -354,6 +354,7 @@ def test_read():
|
|
354
354
|
],
|
355
355
|
test_read_limit_reached=False,
|
356
356
|
inferred_schema=None,
|
357
|
+
latest_config_update={}
|
357
358
|
)
|
358
359
|
|
359
360
|
expected_airbyte_message = AirbyteMessage(
|
@@ -367,6 +368,7 @@ def test_read():
|
|
367
368
|
],
|
368
369
|
"test_read_limit_reached": False,
|
369
370
|
"inferred_schema": None,
|
371
|
+
"latest_config_update": {}
|
370
372
|
},
|
371
373
|
emitted_at=1,
|
372
374
|
),
|
@@ -407,7 +409,8 @@ def test_read_returns_error_response(mock_from_exception):
|
|
407
409
|
pages=[StreamReadPages(records=[], request=None, response=None)],
|
408
410
|
slice_descriptor=None, state=None)],
|
409
411
|
test_read_limit_reached=False,
|
410
|
-
inferred_schema=None
|
412
|
+
inferred_schema=None,
|
413
|
+
latest_config_update={})
|
411
414
|
|
412
415
|
expected_message = AirbyteMessage(
|
413
416
|
type=MessageType.RECORD,
|
@@ -9,7 +9,15 @@ from unittest.mock import MagicMock, patch
|
|
9
9
|
import pytest
|
10
10
|
from airbyte_cdk.connector_builder.message_grouper import MessageGrouper
|
11
11
|
from airbyte_cdk.connector_builder.models import HttpRequest, HttpResponse, LogMessage, StreamRead, StreamReadPages
|
12
|
-
from airbyte_cdk.models import
|
12
|
+
from airbyte_cdk.models import (
|
13
|
+
AirbyteControlConnectorConfigMessage,
|
14
|
+
AirbyteControlMessage,
|
15
|
+
AirbyteLogMessage,
|
16
|
+
AirbyteMessage,
|
17
|
+
AirbyteRecordMessage,
|
18
|
+
Level,
|
19
|
+
OrchestratorType,
|
20
|
+
)
|
13
21
|
from airbyte_cdk.models import Type as MessageType
|
14
22
|
from unit_tests.connector_builder.utils import create_configured_catalog
|
15
23
|
|
@@ -463,9 +471,9 @@ def test_get_grouped_messages_with_many_slices(mock_entrypoint_read):
|
|
463
471
|
)
|
464
472
|
)
|
465
473
|
|
466
|
-
|
474
|
+
connector_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
|
467
475
|
|
468
|
-
stream_read: StreamRead =
|
476
|
+
stream_read: StreamRead = connector_builder_handler.get_message_groups(
|
469
477
|
source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
|
470
478
|
)
|
471
479
|
|
@@ -530,6 +538,76 @@ def test_read_stream_returns_error_if_stream_does_not_exist():
|
|
530
538
|
assert "ERROR" in actual_response.logs[0].level
|
531
539
|
|
532
540
|
|
541
|
+
@patch('airbyte_cdk.connector_builder.message_grouper.AirbyteEntrypoint.read')
|
542
|
+
def test_given_control_message_then_stream_read_has_config_update(mock_entrypoint_read):
|
543
|
+
updated_config = {"x": 1}
|
544
|
+
mock_source = make_mock_source(mock_entrypoint_read, iter(
|
545
|
+
any_request_and_response_with_a_record() + [connector_configuration_control_message(1, updated_config)]
|
546
|
+
))
|
547
|
+
connector_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
|
548
|
+
stream_read: StreamRead = connector_builder_handler.get_message_groups(
|
549
|
+
source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
|
550
|
+
)
|
551
|
+
|
552
|
+
assert stream_read.latest_config_update == updated_config
|
553
|
+
|
554
|
+
|
555
|
+
@patch('airbyte_cdk.connector_builder.message_grouper.AirbyteEntrypoint.read')
|
556
|
+
def test_given_no_control_message_then_use_in_memory_config_change_as_update(mock_entrypoint_read):
|
557
|
+
mock_source = make_mock_source(mock_entrypoint_read, iter(any_request_and_response_with_a_record()))
|
558
|
+
connector_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
|
559
|
+
full_config = {**CONFIG, **{"__injected_declarative_manifest": MANIFEST}}
|
560
|
+
stream_read: StreamRead = connector_builder_handler.get_message_groups(
|
561
|
+
source=mock_source, config=full_config, configured_catalog=create_configured_catalog("hashiras")
|
562
|
+
)
|
563
|
+
|
564
|
+
assert stream_read.latest_config_update == CONFIG
|
565
|
+
|
566
|
+
|
567
|
+
@patch('airbyte_cdk.connector_builder.message_grouper.AirbyteEntrypoint.read')
|
568
|
+
def test_given_multiple_control_messages_then_stream_read_has_latest_based_on_emitted_at(mock_entrypoint_read):
|
569
|
+
earliest = 0
|
570
|
+
earliest_config = {"earliest": 0}
|
571
|
+
latest = 1
|
572
|
+
latest_config = {"latest": 1}
|
573
|
+
mock_source = make_mock_source(mock_entrypoint_read, iter(
|
574
|
+
any_request_and_response_with_a_record() +
|
575
|
+
[
|
576
|
+
# here, we test that even if messages are emitted in a different order, we still rely on `emitted_at`
|
577
|
+
connector_configuration_control_message(latest, latest_config),
|
578
|
+
connector_configuration_control_message(earliest, earliest_config),
|
579
|
+
]
|
580
|
+
)
|
581
|
+
)
|
582
|
+
connector_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
|
583
|
+
stream_read: StreamRead = connector_builder_handler.get_message_groups(
|
584
|
+
source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
|
585
|
+
)
|
586
|
+
|
587
|
+
assert stream_read.latest_config_update == latest_config
|
588
|
+
|
589
|
+
|
590
|
+
@patch('airbyte_cdk.connector_builder.message_grouper.AirbyteEntrypoint.read')
|
591
|
+
def test_given_multiple_control_messages_with_same_timestamp_then_stream_read_has_latest_based_on_message_order(mock_entrypoint_read):
|
592
|
+
emitted_at = 0
|
593
|
+
earliest_config = {"earliest": 0}
|
594
|
+
latest_config = {"latest": 1}
|
595
|
+
mock_source = make_mock_source(mock_entrypoint_read, iter(
|
596
|
+
any_request_and_response_with_a_record() +
|
597
|
+
[
|
598
|
+
connector_configuration_control_message(emitted_at, earliest_config),
|
599
|
+
connector_configuration_control_message(emitted_at, latest_config),
|
600
|
+
]
|
601
|
+
)
|
602
|
+
)
|
603
|
+
connector_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
|
604
|
+
stream_read: StreamRead = connector_builder_handler.get_message_groups(
|
605
|
+
source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
|
606
|
+
)
|
607
|
+
|
608
|
+
assert stream_read.latest_config_update == latest_config
|
609
|
+
|
610
|
+
|
533
611
|
def make_mock_source(mock_entrypoint_read, return_value: Iterator) -> MagicMock:
|
534
612
|
mock_source = MagicMock()
|
535
613
|
mock_entrypoint_read.return_value = return_value
|
@@ -550,3 +628,22 @@ def record_message(stream: str, data: dict) -> AirbyteMessage:
|
|
550
628
|
|
551
629
|
def slice_message(slice_descriptor: str = '{"key": "value"}') -> AirbyteMessage:
|
552
630
|
return AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=Level.INFO, message="slice:" + slice_descriptor))
|
631
|
+
|
632
|
+
|
633
|
+
def connector_configuration_control_message(emitted_at: float, config: dict) -> AirbyteMessage:
|
634
|
+
return AirbyteMessage(
|
635
|
+
type=MessageType.CONTROL,
|
636
|
+
control=AirbyteControlMessage(
|
637
|
+
type=OrchestratorType.CONNECTOR_CONFIG,
|
638
|
+
emitted_at=emitted_at,
|
639
|
+
connectorConfig=AirbyteControlConnectorConfigMessage(config=config),
|
640
|
+
)
|
641
|
+
)
|
642
|
+
|
643
|
+
|
644
|
+
def any_request_and_response_with_a_record():
|
645
|
+
return [
|
646
|
+
request_log_message({"request": 1}),
|
647
|
+
response_log_message({"response": 2}),
|
648
|
+
record_message("hashiras", {"name": "Shinobu Kocho"}),
|
649
|
+
]
|
File without changes
|
File without changes
|
File without changes
|