PyPI - airbyte-cdk - Versions diffs - 0.67.0__py3-none-any.whl → 0.67.2__py3-none-any.whl - Mend

airbyte-cdk 0.67.0py3-none-any.whl → 0.67.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

unit_tests/sources/test_connector_state_manager.py CHANGED Viewed

@@ -284,61 +284,6 @@ def test_get_stream_state(input_state, stream_name, namespace, expected_state):
     assert actual_state == expected_state
-@pytest.mark.parametrize(
-    "input_state, expected_legacy_state, expected_error",
-    [
-        pytest.param(
-            [AirbyteStateMessage(type=AirbyteStateType.LEGACY, data={"actresses": {"id": "seehorn_rhea"}})],
-            {"actresses": {"id": "seehorn_rhea"}},
-            does_not_raise(),
-            id="test_get_legacy_legacy_state_message",
-        ),
-        pytest.param(
-            [
-                AirbyteStateMessage(
-                    type=AirbyteStateType.STREAM,
-                    stream=AirbyteStreamState(
-                        stream_descriptor=StreamDescriptor(name="actresses", namespace="public"),
-                        stream_state=AirbyteStateBlob.parse_obj({"id": "seehorn_rhea"}),
-                    ),
-                )
-            ],
-            {"actresses": {"id": "seehorn_rhea"}},
-            does_not_raise(),
-            id="test_get_legacy_from_stream_state",
-        ),
-        pytest.param(
-            {
-                "actors": {"created_at": "1962-10-22"},
-                "actresses": {"id": "seehorn_rhea"},
-            },
-            {"actors": {"created_at": "1962-10-22"}, "actresses": {"id": "seehorn_rhea"}},
-            does_not_raise(),
-            id="test_get_legacy_from_legacy_state_blob",
-        ),
-        pytest.param(
-            [
-                AirbyteStateMessage(
-                    type=AirbyteStateType.STREAM,
-                    stream=AirbyteStreamState(
-                        stream_descriptor=StreamDescriptor(name="actresses", namespace="public"),
-                        stream_state=None,
-                    ),
-                )
-            ],
-            {"actresses": {}},
-            does_not_raise(),
-            id="test_get_legacy_from_stream_state",
-        ),
-    ],
-)
-def test_get_legacy_state(input_state, expected_legacy_state, expected_error):
-    with expected_error:
-        state_manager = ConnectorStateManager({}, input_state)
-        actual_legacy_state = state_manager._get_legacy_state()
-        assert actual_legacy_state == expected_legacy_state
 def test_get_state_returns_deep_copy():
     input_state = [
         AirbyteStateMessage(
@@ -422,11 +367,10 @@ def test_update_state_for_stream(start_state, update_name, update_namespace, upd
     assert state_manager.per_stream_states[
         HashableStreamDescriptor(name=update_name, namespace=update_namespace)
     ] == AirbyteStateBlob.parse_obj(update_value)
-    assert state_manager._get_legacy_state() == expected_legacy_state
 @pytest.mark.parametrize(
-    "start_state, update_name, update_namespace, send_per_stream, expected_state_message",
+    "start_state, update_name, update_namespace, expected_state_message",
     [
         pytest.param(
             [
@@ -447,7 +391,6 @@ def test_update_state_for_stream(start_state, update_name, update_namespace, upd
             ],
             "episodes",
             "public",
-            True,
             AirbyteMessage(
                 type=MessageType.STATE,
                 state=AirbyteStateMessage(
@@ -456,7 +399,6 @@ def test_update_state_for_stream(start_state, update_name, update_namespace, upd
                         stream_descriptor=StreamDescriptor(name="episodes", namespace="public"),
                         stream_state=AirbyteStateBlob.parse_obj({"created_at": "2022_05_22"}),
                     ),
-                    data={"episodes": {"created_at": "2022_05_22"}, "seasons": {"id": 1}},
                 ),
             ),
             id="test_emit_state_message_with_stream_and_legacy",
@@ -473,7 +415,6 @@ def test_update_state_for_stream(start_state, update_name, update_namespace, upd
             ],
             "episodes",
             "public",
-            True,
             AirbyteMessage(
                 type=MessageType.STATE,
                 state=AirbyteStateMessage(
@@ -482,7 +423,6 @@ def test_update_state_for_stream(start_state, update_name, update_namespace, upd
                         stream_descriptor=StreamDescriptor(name="episodes", namespace="public"),
                         stream_state=AirbyteStateBlob(),
                     ),
-                    data={"episodes": {}},
                 ),
             ),
             id="test_always_emit_message_with_stream_state_blob",
@@ -499,7 +439,6 @@ def test_update_state_for_stream(start_state, update_name, update_namespace, upd
             ],
             "missing",
             "public",
-            True,
             AirbyteMessage(
                 type=MessageType.STATE,
                 state=AirbyteStateMessage(
@@ -507,7 +446,6 @@ def test_update_state_for_stream(start_state, update_name, update_namespace, upd
                     stream=AirbyteStreamState(
                         stream_descriptor=StreamDescriptor(name="missing", namespace="public"), stream_state=AirbyteStateBlob()
                     ),
-                    data={"episodes": {"id": 507}},
                 ),
             ),
             id="test_emit_state_nonexistent_stream_name",
@@ -524,7 +462,6 @@ def test_update_state_for_stream(start_state, update_name, update_namespace, upd
             ],
             "episodes",
             "nonexistent",
-            True,
             AirbyteMessage(
                 type=MessageType.STATE,
                 state=AirbyteStateMessage(
@@ -532,72 +469,14 @@ def test_update_state_for_stream(start_state, update_name, update_namespace, upd
                     stream=AirbyteStreamState(
                         stream_descriptor=StreamDescriptor(name="episodes", namespace="nonexistent"), stream_state=AirbyteStateBlob()
                     ),
-                    data={"episodes": {"id": 507}},
                 ),
             ),
             id="test_emit_state_wrong_namespace",
         ),
-        pytest.param(
-            [
-                AirbyteStateMessage(
-                    type=AirbyteStateType.STREAM,
-                    stream=AirbyteStreamState(
-                        stream_descriptor=StreamDescriptor(name="episodes", namespace=None),
-                        stream_state=AirbyteStateBlob.parse_obj({"created_at": "2022_05_22"}),
-                    ),
-                ),
-                AirbyteStateMessage(
-                    type=AirbyteStateType.STREAM,
-                    stream=AirbyteStreamState(
-                        stream_descriptor=StreamDescriptor(name="seasons", namespace=None),
-                        stream_state=AirbyteStateBlob.parse_obj({"id": 1}),
-                    ),
-                ),
-            ],
-            "episodes",
-            "",
-            False,
-            AirbyteMessage(
-                type=MessageType.STATE,
-                state=AirbyteStateMessage(
-                    data={"episodes": {"created_at": "2022_05_22"}, "seasons": {"id": 1}},
-                ),
-            ),
-            id="test_emit_legacy_state_format",
-        ),
     ],
 )
-def test_create_state_message(start_state, update_name, update_namespace, send_per_stream, expected_state_message):
+def test_create_state_message(start_state, update_name, update_namespace, expected_state_message):
     state_manager = ConnectorStateManager({}, start_state)
-    actual_state_message = state_manager.create_state_message(
-        stream_name=update_name, namespace=update_namespace, send_per_stream_state=send_per_stream
-    )
+    actual_state_message = state_manager.create_state_message(stream_name=update_name, namespace=update_namespace)
     assert actual_state_message == expected_state_message
-def test_do_not_set_stream_descriptor_namespace_when_none():
-    """
-    This is a very specific test to ensure that the None value is not set and emitted back to the platform for namespace.
-    The platform performs validation on the state message sent by the connector and namespace must be a string or not
-    included at all. The None value registers as null by the platform which is not valid input. We can verify that fields
-    on a pydantic model are not defined using exclude_unset parameter.
-    """
-    expected_stream_state_descriptor = {"name": "episodes"}
-    state_manager = ConnectorStateManager(
-        {},
-        [
-            AirbyteStateMessage(
-                type=AirbyteStateType.STREAM,
-                stream=AirbyteStreamState(
-                    stream_descriptor=StreamDescriptor(name="episodes"),
-                    stream_state=None,
-                ),
-            ),
-        ],
-    )
-    actual_state_message = state_manager.create_state_message(stream_name="episodes", namespace=None, send_per_stream_state=True)
-    assert actual_state_message.state.stream.stream_descriptor.dict(exclude_unset=True) == expected_stream_state_descriptor

unit_tests/sources/test_source.py CHANGED Viewed

@@ -365,8 +365,8 @@ def test_internal_config(abstract_source, catalog):
     # Test with empty config
     logger = logging.getLogger(f"airbyte.{getattr(abstract_source, 'name', '')}")
     records = [r for r in abstract_source.read(logger=logger, config={}, catalog=catalog, state={})]
-    # 3 for http stream, 3 for non http stream and 3 for stream status messages for each stream (2x)
-    assert len(records) == 3 + 3 + 3 + 3
+    # 3 for http stream, 3 for non http stream, 1 for state message for each stream (2x) and 3 for stream status messages for each stream (2x)
+    assert len(records) == 3 + 3 + 1 + 1 + 3 + 3
     assert http_stream.read_records.called
     assert non_http_stream.read_records.called
     # Make sure page_size havent been set
@@ -375,21 +375,21 @@ def test_internal_config(abstract_source, catalog):
     # Test with records limit set to 1
     internal_config = {"some_config": 100, "_limit": 1}
     records = [r for r in abstract_source.read(logger=logger, config=internal_config, catalog=catalog, state={})]
-    # 1 from http stream + 1 from non http stream and 3 for stream status messages for each stream (2x)
-    assert len(records) == 1 + 1 + 3 + 3
+    # 1 from http stream + 1 from non http stream, 1 for state message for each stream (2x) and 3 for stream status messages for each stream (2x)
+    assert len(records) == 1 + 1 + 1 + 1 + 3 + 3
     assert "_limit" not in abstract_source.streams_config
     assert "some_config" in abstract_source.streams_config
     # Test with records limit set to number that exceeds expceted records
     internal_config = {"some_config": 100, "_limit": 20}
     records = [r for r in abstract_source.read(logger=logger, config=internal_config, catalog=catalog, state={})]
-    assert len(records) == 3 + 3 + 3 + 3
+    assert len(records) == 3 + 3 + 1 + 1 + 3 + 3
     # Check if page_size paramter is set to http instance only
     internal_config = {"some_config": 100, "_page_size": 2}
     records = [r for r in abstract_source.read(logger=logger, config=internal_config, catalog=catalog, state={})]
     assert "_page_size" not in abstract_source.streams_config
     assert "some_config" in abstract_source.streams_config
-    assert len(records) == 3 + 3 + 3 + 3
+    assert len(records) == 3 + 3 + 1 + 1 + 3 + 3
     assert http_stream.page_size == 2
     # Make sure page_size havent been set for non http streams
     assert not non_http_stream.page_size
@@ -403,6 +403,7 @@ def test_internal_config_limit(mocker, abstract_source, catalog):
     SLICE_DEBUG_LOG_COUNT = 1
     FULL_RECORDS_NUMBER = 3
     TRACE_STATUS_COUNT = 3
+    STATE_COUNT = 1
     streams = abstract_source.streams(None)
     http_stream = streams[0]
     http_stream.read_records.return_value = [{}] * FULL_RECORDS_NUMBER
@@ -410,7 +411,7 @@ def test_internal_config_limit(mocker, abstract_source, catalog):
     catalog.streams[0].sync_mode = SyncMode.full_refresh
     records = [r for r in abstract_source.read(logger=logger_mock, config=internal_config, catalog=catalog, state={})]
-    assert len(records) == STREAM_LIMIT + SLICE_DEBUG_LOG_COUNT + TRACE_STATUS_COUNT
+    assert len(records) == STREAM_LIMIT + SLICE_DEBUG_LOG_COUNT + TRACE_STATUS_COUNT + STATE_COUNT
     logger_info_args = [call[0][0] for call in logger_mock.info.call_args_list]
     # Check if log line matches number of limit
     read_log_record = [_l for _l in logger_info_args if _l.startswith("Read")]
@@ -440,6 +441,7 @@ SCHEMA = {"type": "object", "properties": {"value": {"type": "string"}}}
 def test_source_config_no_transform(mocker, abstract_source, catalog):
     SLICE_DEBUG_LOG_COUNT = 1
     TRACE_STATUS_COUNT = 3
+    STATE_COUNT = 1
     logger_mock = mocker.MagicMock()
     logger_mock.level = logging.DEBUG
     streams = abstract_source.streams(None)
@@ -447,7 +449,7 @@ def test_source_config_no_transform(mocker, abstract_source, catalog):
     http_stream.get_json_schema.return_value = non_http_stream.get_json_schema.return_value = SCHEMA
     http_stream.read_records.return_value, non_http_stream.read_records.return_value = [[{"value": 23}] * 5] * 2
     records = [r for r in abstract_source.read(logger=logger_mock, config={}, catalog=catalog, state={})]
-    assert len(records) == 2 * (5 + SLICE_DEBUG_LOG_COUNT + TRACE_STATUS_COUNT)
+    assert len(records) == 2 * (5 + SLICE_DEBUG_LOG_COUNT + TRACE_STATUS_COUNT + STATE_COUNT)
     assert [r.record.data for r in records if r.type == Type.RECORD] == [{"value": 23}] * 2 * 5
     assert http_stream.get_json_schema.call_count == 5
     assert non_http_stream.get_json_schema.call_count == 5
@@ -458,6 +460,7 @@ def test_source_config_transform(mocker, abstract_source, catalog):
     logger_mock.level = logging.DEBUG
     SLICE_DEBUG_LOG_COUNT = 2
     TRACE_STATUS_COUNT = 6
+    STATE_COUNT = 2
     streams = abstract_source.streams(None)
     http_stream, non_http_stream = streams
     http_stream.transformer = TypeTransformer(TransformConfig.DefaultSchemaNormalization)
@@ -465,7 +468,7 @@ def test_source_config_transform(mocker, abstract_source, catalog):
     http_stream.get_json_schema.return_value = non_http_stream.get_json_schema.return_value = SCHEMA
     http_stream.read_records.return_value, non_http_stream.read_records.return_value = [{"value": 23}], [{"value": 23}]
     records = [r for r in abstract_source.read(logger=logger_mock, config={}, catalog=catalog, state={})]
-    assert len(records) == 2 + SLICE_DEBUG_LOG_COUNT + TRACE_STATUS_COUNT
+    assert len(records) == 2 + SLICE_DEBUG_LOG_COUNT + TRACE_STATUS_COUNT + STATE_COUNT
     assert [r.record.data for r in records if r.type == Type.RECORD] == [{"value": "23"}] * 2
@@ -474,13 +477,14 @@ def test_source_config_transform_and_no_transform(mocker, abstract_source, catal
     logger_mock.level = logging.DEBUG
     SLICE_DEBUG_LOG_COUNT = 2
     TRACE_STATUS_COUNT = 6
+    STATE_COUNT = 2
     streams = abstract_source.streams(None)
     http_stream, non_http_stream = streams
     http_stream.transformer = TypeTransformer(TransformConfig.DefaultSchemaNormalization)
     http_stream.get_json_schema.return_value = non_http_stream.get_json_schema.return_value = SCHEMA
     http_stream.read_records.return_value, non_http_stream.read_records.return_value = [{"value": 23}], [{"value": 23}]
     records = [r for r in abstract_source.read(logger=logger_mock, config={}, catalog=catalog, state={})]
-    assert len(records) == 2 + SLICE_DEBUG_LOG_COUNT + TRACE_STATUS_COUNT
+    assert len(records) == 2 + SLICE_DEBUG_LOG_COUNT + TRACE_STATUS_COUNT + STATE_COUNT
     assert [r.record.data for r in records if r.type == Type.RECORD] == [{"value": "23"}, {"value": 23}]
@@ -526,8 +530,8 @@ def test_read_default_http_availability_strategy_stream_available(catalog, mocke
     source = MockAbstractSource(streams=streams)
     logger = logging.getLogger(f"airbyte.{getattr(abstract_source, 'name', '')}")
     records = [r for r in source.read(logger=logger, config={}, catalog=catalog, state={})]
-    # 3 for http stream, 3 for non http stream and 3 for stream status messages for each stream (2x)
-    assert len(records) == 3 + 3 + 3 + 3
+    # 3 for http stream, 3 for non http stream, 1 for state message for each stream (2x)  and 3 for stream status messages for each stream (2x)
+    assert len(records) == 3 + 3 + 1 + 1 + 3 + 3
     assert http_stream.read_records.called
     assert non_http_stream.read_records.called
@@ -584,8 +588,8 @@ def test_read_default_http_availability_strategy_stream_unavailable(catalog, moc
     with caplog.at_level(logging.WARNING):
         records = [r for r in source.read(logger=logger, config={}, catalog=catalog, state={})]
-    # 0 for http stream, 3 for non http stream and 3 status trace messages
-    assert len(records) == 0 + 3 + 3
+    # 0 for http stream, 3 for non http stream, 1 for non http stream state message and 3 status trace messages
+    assert len(records) == 0 + 3 + 1 + 3
     assert non_http_stream.read_records.called
     expected_logs = [
         f"Skipped syncing stream '{http_stream.name}' because it was unavailable.",

{airbyte_cdk-0.67.0.dist-info → airbyte_cdk-0.67.2.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{airbyte_cdk-0.67.0.dist-info → airbyte_cdk-0.67.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{airbyte_cdk-0.67.0.dist-info → airbyte_cdk-0.67.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

airbyte-cdk 0.67.0__py3-none-any.whl → 0.67.2__py3-none-any.whl

airbyte-cdk 0.67.0py3-none-any.whl → 0.67.2py3-none-any.whl