PyPI - airbyte-internal-ops - Versions diffs - 0.4.2__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

airbyte-internal-ops 0.4.2py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (130) hide show

airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/utils.py DELETED Viewed

@@ -1,187 +0,0 @@
-# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
-from __future__ import annotations
-import json
-import logging
-from collections.abc import Callable, Iterable
-from pathlib import Path
-from typing import TYPE_CHECKING, Optional, Union
-import pytest
-from airbyte_protocol.models import (
-    AirbyteCatalog,
-    AirbyteMessage,
-    ConnectorSpecification,
-    Status,
-    Type,
-)  # type: ignore
-from deepdiff import DeepDiff  # type: ignore
-from live_tests import stash_keys
-from live_tests.commons.models import ExecutionResult
-from live_tests.consts import MAX_LINES_IN_REPORT
-if TYPE_CHECKING:
-    from _pytest.fixtures import SubRequest
-MAX_DIFF_SIZE_FOR_LOGGING = 500
-def get_test_logger(request: SubRequest) -> logging.Logger:
-    return logging.getLogger(request.node.name)
-def filter_records(messages: Iterable[AirbyteMessage]) -> Iterable[AirbyteMessage]:
-    for message in messages:
-        if message.type is Type.RECORD:
-            yield message
-def write_string_to_test_artifact(
-    request: SubRequest, content: str, filename: str, subdir: Optional[Path] = None
-) -> Path:
-    # StashKey (in this case TEST_ARTIFACT_DIRECTORY) defines the output class of this,
-    # so this is already a Path.
-    test_artifact_directory = request.config.stash[stash_keys.TEST_ARTIFACT_DIRECTORY]
-    if subdir:
-        test_artifact_directory = test_artifact_directory / subdir
-    test_artifact_directory.mkdir(parents=True, exist_ok=True)
-    artifact_path = test_artifact_directory / filename
-    artifact_path.write_text(content)
-    return artifact_path
-def get_and_write_diff(
-    request: SubRequest,
-    control_data: Union[list, dict],
-    target_data: Union[list, dict],
-    filepath: str,
-    ignore_order: bool,
-    exclude_paths: Optional[list[str]],
-) -> str:
-    logger = get_test_logger(request)
-    diff = DeepDiff(
-        control_data,
-        target_data,
-        ignore_order=ignore_order,
-        report_repetition=True,
-        exclude_regex_paths=exclude_paths,
-    )
-    if diff:
-        diff_json = diff.to_json()
-        parsed_diff = json.loads(diff_json)
-        formatted_diff_json = json.dumps(parsed_diff, indent=2)
-        diff_path_tree = write_string_to_test_artifact(
-            request, str(diff.tree), f"{filepath}_tree.txt", subdir=request.node.name
-        )
-        diff_path_text = write_string_to_test_artifact(
-            request,
-            formatted_diff_json,
-            f"{filepath}_text.txt",
-            subdir=Path(request.node.name),
-        )
-        diff_path_pretty = write_string_to_test_artifact(
-            request,
-            str(diff.pretty()),
-            f"{filepath}_pretty.txt",
-            subdir=Path(request.node.name),
-        )
-        logger.info(
-            f"Diff file are stored in {diff_path_tree}, {diff_path_text}, and {diff_path_pretty}."
-        )
-        if len(diff_json.encode("utf-8")) < MAX_DIFF_SIZE_FOR_LOGGING:
-            logger.error(formatted_diff_json)
-        return formatted_diff_json
-    return ""
-def fail_test_on_failing_execution_results(
-    record_property: Callable, execution_results: list[ExecutionResult]
-) -> None:
-    error_messages = []
-    for execution_result in execution_results:
-        if not execution_result.success:
-            property_suffix = f"of failing execution {execution_result.command.value} on {execution_result.connector_under_test.name}:{execution_result.connector_under_test.version} [{MAX_LINES_IN_REPORT} last lines]"
-            record_property(
-                f"Stdout {property_suffix}",
-                tail_file(execution_result.stdout_file_path, n=MAX_LINES_IN_REPORT),
-            )
-            record_property(
-                f"Stderr of {property_suffix}",
-                tail_file(execution_result.stderr_file_path, n=MAX_LINES_IN_REPORT),
-            )
-            error_messages.append(
-                f"Failed executing command {execution_result.command} on {execution_result.connector_under_test.name}:{execution_result.connector_under_test.version}"
-            )
-    if error_messages:
-        pytest.fail("\n".join(error_messages))
-def tail_file(file_path: Path, n: int = MAX_LINES_IN_REPORT) -> list[str]:
-    with open(file_path) as f:
-        # Move the cursor to the end of the file
-        f.seek(0, 2)
-        file_size = f.tell()
-        lines: list[str] = []
-        read_size = min(4096, file_size)
-        cursor = file_size - read_size
-        # Read chunks of the file until we've found n lines
-        while len(lines) < n and cursor >= 0:
-            f.seek(cursor)
-            chunk = f.read(read_size)
-            lines.extend(chunk.splitlines(True)[-n:])
-            cursor -= read_size
-        # Return the last n lines
-        return lines[-n:]
-def is_successful_check(execution_result: ExecutionResult) -> bool:
-    for message in execution_result.airbyte_messages:
-        if (
-            message.type is Type.CONNECTION_STATUS
-            and message.connectionStatus
-            and message.connectionStatus.status is Status.SUCCEEDED
-        ):
-            return True
-    return False
-def get_catalog(execution_result: ExecutionResult) -> AirbyteCatalog:
-    catalog = [
-        m.catalog
-        for m in execution_result.airbyte_messages
-        if m.type is Type.CATALOG and m.catalog
-    ]
-    try:
-        return catalog[0]
-    except ValueError:
-        raise ValueError(
-            f"Expected exactly one catalog in the execution result, but got {len(catalog)}."
-        )
-def get_spec(execution_result: ExecutionResult) -> ConnectorSpecification:
-    spec = [m.spec for m in execution_result.airbyte_messages if m.type is Type.SPEC]
-    try:
-        return spec[0]
-    except ValueError:
-        raise ValueError(
-            f"Expected exactly one spec in the execution result, but got {len(spec)}."
-        )
-def find_all_values_for_key_in_schema(schema: dict, searched_key: str):
-    """Retrieve all (nested) values in a schema for a specific searched key"""
-    if isinstance(schema, list):
-        for schema_item in schema:
-            yield from find_all_values_for_key_in_schema(schema_item, searched_key)
-    if isinstance(schema, dict):
-        for key, value in schema.items():
-            if key == searched_key:
-                yield value
-            if isinstance(value, dict) or isinstance(value, list):
-                yield from find_all_values_for_key_in_schema(value, searched_key)

airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/__init__.py DELETED Viewed

File without changes

airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_check.py DELETED Viewed

@@ -1,61 +0,0 @@
-#
-# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
-#
-from __future__ import annotations
-from typing import Callable
-import pytest
-from airbyte_protocol.models import Type
-from live_tests.commons.models import ExecutionResult
-from live_tests.consts import MAX_LINES_IN_REPORT
-from live_tests.utils import (
-    fail_test_on_failing_execution_results,
-    is_successful_check,
-    tail_file,
-)
-pytestmark = [
-    pytest.mark.anyio,
-]
-@pytest.mark.allow_diagnostic_mode
-async def test_check_succeeds(
-    record_property: Callable,
-    check_target_execution_result: ExecutionResult,
-) -> None:
-    """
-    Verify that the check command succeeds on the target connection.
-    Success is determined by the presence of a connection status message with a status of SUCCEEDED.
-    """
-    fail_test_on_failing_execution_results(
-        record_property,
-        [check_target_execution_result],
-    )
-    assert (
-        len(
-            [
-                msg
-                for msg in check_target_execution_result.airbyte_messages
-                if msg.type == Type.CONNECTION_STATUS
-            ]
-        )
-        == 1
-    )
-    successful_target_check: bool = is_successful_check(check_target_execution_result)
-    error_messages = []
-    if not successful_target_check:
-        record_property(
-            f"Target CHECK standard output  [Last {MAX_LINES_IN_REPORT} lines]",
-            tail_file(
-                check_target_execution_result.stdout_file_path, n=MAX_LINES_IN_REPORT
-            ),
-        )
-        error_messages.append(
-            "The target check did not succeed. Check the test artifacts for more information."
-        )
-    if error_messages:
-        pytest.fail("\n".join(error_messages))

airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_discover.py DELETED Viewed

@@ -1,217 +0,0 @@
-#
-# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
-#
-from __future__ import annotations
-from typing import Callable, List, Union
-import dpath.util
-import jsonschema
-import pytest
-from airbyte_protocol.models import AirbyteCatalog
-from live_tests.commons.models import ExecutionResult
-from live_tests.utils import (
-    fail_test_on_failing_execution_results,
-    find_all_values_for_key_in_schema,
-    get_catalog,
-)
-pytestmark = [
-    pytest.mark.anyio,
-]
-@pytest.fixture(scope="session")
-def target_discovered_catalog(
-    discover_target_execution_result: ExecutionResult,
-) -> AirbyteCatalog:
-    return get_catalog(discover_target_execution_result)
-@pytest.mark.allow_diagnostic_mode
-async def test_discover(
-    record_property: Callable,
-    discover_target_execution_result: ExecutionResult,
-    target_discovered_catalog: AirbyteCatalog,
-):
-    """
-    Verify that the discover command succeeds on the target connection.
-    Success is determined by the presence of a catalog with one or more streams, all with unique names.
-    """
-    fail_test_on_failing_execution_results(
-        record_property,
-        [discover_target_execution_result],
-    )
-    duplicated_stream_names = _duplicated_stream_names(
-        target_discovered_catalog.streams
-    )
-    assert target_discovered_catalog is not None, "Message should have catalog"
-    assert (
-        hasattr(target_discovered_catalog, "streams")
-        and target_discovered_catalog.streams
-    ), "Catalog should contain streams"
-    assert len(duplicated_stream_names) == 0, (
-        f"Catalog should have uniquely named streams, duplicates are: {duplicated_stream_names}"
-    )
-def _duplicated_stream_names(streams) -> List[str]:
-    """Counts number of times a stream appears in the catalog"""
-    name_counts = dict()
-    for stream in streams:
-        count = name_counts.get(stream.name, 0)
-        name_counts[stream.name] = count + 1
-    return [k for k, v in name_counts.items() if v > 1]
-@pytest.mark.allow_diagnostic_mode
-async def test_streams_have_valid_json_schemas(
-    target_discovered_catalog: AirbyteCatalog,
-):
-    """Check if all stream schemas are valid json schemas."""
-    for stream in target_discovered_catalog.streams:
-        jsonschema.Draft7Validator.check_schema(stream.json_schema)
-@pytest.mark.allow_diagnostic_mode
-async def test_defined_cursors_exist_in_schema(
-    target_discovered_catalog: AirbyteCatalog,
-):
-    """Check if all of the source defined cursor fields exist on stream's json schema."""
-    for stream in target_discovered_catalog.streams:
-        if not stream.default_cursor_field:
-            continue
-        schema = stream.json_schema
-        assert "properties" in schema, (
-            f"Top level item should have an 'object' type for {stream.name} stream schema"
-        )
-        cursor_path = "/properties/".join(stream.default_cursor_field)
-        cursor_field_location = dpath.util.search(schema["properties"], cursor_path)
-        assert cursor_field_location, (
-            f"Some of defined cursor fields {stream.default_cursor_field} are not specified in discover schema "
-            f"properties for {stream.name} stream"
-        )
-@pytest.mark.allow_diagnostic_mode
-async def test_defined_refs_exist_in_schema(target_discovered_catalog: AirbyteCatalog):
-    """Check the presence of unresolved `$ref`s values within each json schema."""
-    schemas_errors = []
-    for stream in target_discovered_catalog.streams:
-        check_result = list(
-            find_all_values_for_key_in_schema(stream.json_schema, "$ref")
-        )
-        if check_result:
-            schemas_errors.append({stream.name: check_result})
-    assert not schemas_errors, (
-        f"Found unresolved `$refs` values for selected streams: {tuple(schemas_errors)}."
-    )
-@pytest.mark.allow_diagnostic_mode
-@pytest.mark.parametrize("keyword", ["allOf", "not"])
-async def test_defined_keyword_exist_in_schema(
-    keyword, target_discovered_catalog: AirbyteCatalog
-):
-    """Check for the presence of not allowed keywords within each json schema"""
-    schemas_errors = []
-    for stream in target_discovered_catalog.streams:
-        check_result = _find_keyword_schema(stream.json_schema, key=keyword)
-        if check_result:
-            schemas_errors.append(stream.name)
-    assert not schemas_errors, (
-        f"Found not allowed `{keyword}` keyword for selected streams: {schemas_errors}."
-    )
-def _find_keyword_schema(schema: Union[dict, list, str], key: str) -> bool:
-    """Find at least one keyword in a schema, skip object properties"""
-    def _find_keyword(schema, key, _skip=False):
-        if isinstance(schema, list):
-            for v in schema:
-                _find_keyword(v, key)
-        elif isinstance(schema, dict):
-            for k, v in schema.items():
-                if k == key and not _skip:
-                    raise StopIteration
-                rec_skip = k == "properties" and schema.get("type") == "object"
-                _find_keyword(v, key, rec_skip)
-    try:
-        _find_keyword(schema, key)
-    except StopIteration:
-        return True
-    return False
-@pytest.mark.allow_diagnostic_mode
-async def test_primary_keys_exist_in_schema(target_discovered_catalog: AirbyteCatalog):
-    """Check that all primary keys are present in catalog."""
-    for stream in target_discovered_catalog.streams:
-        for pk in stream.source_defined_primary_key or []:
-            schema = stream.json_schema
-            pk_path = "/properties/".join(pk)
-            pk_field_location = dpath.util.search(schema["properties"], pk_path)
-            assert pk_field_location, (
-                f"One of the PKs ({pk}) is not specified in discover schema for {stream.name} stream"
-            )
-@pytest.mark.allow_diagnostic_mode
-async def test_streams_has_sync_modes(target_discovered_catalog: AirbyteCatalog):
-    """Check that the supported_sync_modes is a not empty field in streams of the catalog."""
-    for stream in target_discovered_catalog.streams:
-        assert stream.supported_sync_modes is not None, (
-            f"The stream {stream.name} is missing supported_sync_modes field declaration."
-        )
-        assert len(stream.supported_sync_modes) > 0, (
-            f"supported_sync_modes list on stream {stream.name} should not be empty."
-        )
-@pytest.mark.allow_diagnostic_mode
-async def test_additional_properties_is_true(target_discovered_catalog: AirbyteCatalog):
-    """
-    Check that value of the "additionalProperties" field is always true.
-    A stream schema declaring "additionalProperties": false introduces the risk of accidental breaking changes.
-    Specifically, when removing a property from the stream schema, existing connector catalog will no longer be valid.
-    False value introduces the risk of accidental breaking changes.
-    Read https://github.com/airbytehq/airbyte/issues/14196 for more details.
-    """
-    for stream in target_discovered_catalog.streams:
-        additional_properties_values = list(
-            find_all_values_for_key_in_schema(
-                stream.json_schema, "additionalProperties"
-            )
-        )
-        if additional_properties_values:
-            assert all(
-                [
-                    additional_properties_value is True
-                    for additional_properties_value in additional_properties_values
-                ]
-            ), (
-                "When set, additionalProperties field value must be true for backward compatibility."
-            )
-@pytest.mark.allow_diagnostic_mode
-@pytest.mark.skip(
-    "This a placeholder for a CAT which has too many failures. We need to fix the connectors at scale first."
-)
-async def test_catalog_has_supported_data_types(
-    target_discovered_catalog: AirbyteCatalog,
-):
-    """
-    Check that all streams have supported data types, format and airbyte_types.
-    Supported data types are listed there: https://docs.airbyte.com/understanding-airbyte/supported-data-types/
-    """
-    pass

airbyte_ops_mcp/_legacy/airbyte_ci/connector_live_tests/validation_tests/test_read.py DELETED Viewed

@@ -1,177 +0,0 @@
-#
-# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
-#
-from __future__ import annotations
-from collections import defaultdict
-from functools import reduce
-from typing import TYPE_CHECKING, Any, Callable, List, Mapping, Tuple
-import pytest
-from airbyte_protocol.models import (
-    AirbyteStateMessage,
-    AirbyteStateStats,
-    AirbyteStateType,
-    AirbyteStreamStatus,
-    AirbyteStreamStatusTraceMessage,
-    ConfiguredAirbyteCatalog,
-)
-from live_tests.commons.json_schema_helper import conforms_to_schema
-from live_tests.commons.models import ExecutionResult
-from live_tests.utils import fail_test_on_failing_execution_results, get_test_logger
-if TYPE_CHECKING:
-    from _pytest.fixtures import SubRequest
-pytestmark = [
-    pytest.mark.anyio,
-]
-@pytest.mark.allow_diagnostic_mode
-async def test_read(
-    request: SubRequest,
-    record_property: Callable,
-    read_target_execution_result: ExecutionResult,
-):
-    """
-    Verify that the read command succeeds on the target connection.
-    Also makes assertions about the validity of the read command output:
-    - At least one state message is emitted per stream
-    - Appropriate stream status messages are emitted for each stream
-    - If a primary key exists for the stream, it is present in the records emitted
-    """
-    has_records = False
-    errors = []
-    warnings = []
-    fail_test_on_failing_execution_results(
-        record_property,
-        [read_target_execution_result],
-    )
-    for stream in read_target_execution_result.configured_catalog.streams:
-        records = read_target_execution_result.get_records_per_stream(
-            stream.stream.name
-        )
-        state_messages = read_target_execution_result.get_states_per_stream(
-            stream.stream.name
-        )
-        statuses = read_target_execution_result.get_status_messages_per_stream(
-            stream.stream.name
-        )
-        primary_key = read_target_execution_result.primary_keys_per_stream.get(
-            stream.stream.name
-        )
-        for record in records:
-            has_records = True
-            if not conforms_to_schema(
-                read_target_execution_result.get_obfuscated_types(record.record.data),
-                stream.schema(),
-            ):
-                errors.append(
-                    f"A record was encountered that does not conform to the schema. stream={stream.stream.name} record={record}"
-                )
-            if primary_key:
-                if _extract_primary_key_value(record.dict(), primary_key) is None:
-                    errors.append(
-                        f"Primary key subkeys {primary_key!r} have null values or not present in {stream.stream.name} stream records."
-                    )
-            if stream.stream.name not in state_messages:
-                errors.append(
-                    f"At least one state message should be emitted per stream, but no state messages were emitted for {stream.stream.name}."
-                )
-            try:
-                _validate_state_messages(
-                    state_messages=state_messages[stream.stream.name],
-                    configured_catalog=read_target_execution_result.configured_catalog,
-                )
-            except AssertionError as exc:
-                warnings.append(
-                    f"Invalid state message for stream {stream.stream.name}. exc={exc} state_messages={state_messages[stream.stream.name]}"
-                )
-            if stream.stream.name not in statuses:
-                warnings.append(
-                    f"No stream statuses were emitted for stream {stream.stream.name}."
-                )
-            if not _validate_stream_statuses(
-                configured_catalog=read_target_execution_result.configured_catalog,
-                statuses=statuses[stream.stream.name],
-            ):
-                errors.append(
-                    f"Invalid statuses for stream {stream.stream.name}. statuses={statuses[stream.stream.name]}"
-                )
-    if not has_records:
-        errors.append("At least one record should be read using provided catalog.")
-    if errors:
-        logger = get_test_logger(request)
-        for error in errors:
-            logger.info(error)
-def _extract_primary_key_value(
-    record: Mapping[str, Any], primary_key: List[List[str]]
-) -> dict[Tuple[str], Any]:
-    pk_values = {}
-    for pk_path in primary_key:
-        pk_value: Any = reduce(
-            lambda data, key: data.get(key) if isinstance(data, dict) else None,
-            pk_path,
-            record,
-        )
-        pk_values[tuple(pk_path)] = pk_value
-    return pk_values
-def _validate_stream_statuses(
-    configured_catalog: ConfiguredAirbyteCatalog,
-    statuses: List[AirbyteStreamStatusTraceMessage],
-):
-    """Validate all statuses for all streams in the catalogs were emitted in correct order:
-    1. STARTED
-    2. RUNNING (can be >1)
-    3. COMPLETE
-    """
-    stream_statuses = defaultdict(list)
-    for status in statuses:
-        stream_statuses[
-            f"{status.stream_descriptor.namespace}-{status.stream_descriptor.name}"
-        ].append(status.status)
-    assert set(
-        f"{x.stream.namespace}-{x.stream.name}" for x in configured_catalog.streams
-    ) == set(stream_statuses), "All stream must emit status"
-    for stream_name, status_list in stream_statuses.items():
-        assert len(status_list) >= 3, (
-            f"Stream `{stream_name}` statuses should be emitted in the next order: `STARTED`, `RUNNING`,... `COMPLETE`"
-        )
-        assert status_list[0] == AirbyteStreamStatus.STARTED
-        assert status_list[-1] == AirbyteStreamStatus.COMPLETE
-        assert all(x == AirbyteStreamStatus.RUNNING for x in status_list[1:-1])
-def _validate_state_messages(
-    state_messages: List[AirbyteStateMessage],
-    configured_catalog: ConfiguredAirbyteCatalog,
-):
-    # Ensure that at least one state message is emitted for each stream
-    assert len(state_messages) >= len(configured_catalog.streams), (
-        "At least one state message should be emitted for each configured stream."
-    )
-    for state_message in state_messages:
-        stream_name = state_message.stream.stream_descriptor.name
-        state_type = state_message.type
-        # Ensure legacy state type is not emitted anymore
-        assert state_type != AirbyteStateType.LEGACY, (
-            f"Ensure that statuses from the {stream_name} stream are emitted using either "
-            "`STREAM` or `GLOBAL` state types, as the `LEGACY` state type is now deprecated."
-        )
-        # Check if stats are of the correct type and present in state message
-        assert isinstance(state_message.sourceStats, AirbyteStateStats), (
-            "Source stats should be in state message."
-        )

airbyte-internal-ops 0.4.2__py3-none-any.whl → 0.5.1__py3-none-any.whl

airbyte-internal-ops 0.4.2py3-none-any.whl → 0.5.1py3-none-any.whl