PyPI - airbyte-cdk - Versions diffs - 0.50.0__py3-none-any.whl → 0.50.2__py3-none-any.whl - Mend

airbyte-cdk 0.50.0py3-none-any.whl → 0.50.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

unit_tests/sources/file_based/scenarios/incremental_scenarios.py CHANGED Viewed

@@ -2,6 +2,7 @@
 # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
 #
+from unit_tests.sources.file_based.helpers import LowHistoryLimitCursor
 from unit_tests.sources.file_based.scenarios.scenario_builder import IncrementalScenarioConfig, TestScenarioBuilder
 single_csv_input_state_is_earlier_scenario = (
@@ -1004,7 +1005,7 @@ multi_csv_remove_old_files_if_history_is_full_scenario = (
         }
     )
     .set_file_type("csv")
-    .set_max_history_size(3)
+    .set_cursor_cls(LowHistoryLimitCursor)
     .set_expected_catalog(
         {
             "streams": [
@@ -1151,7 +1152,7 @@ multi_csv_same_timestamp_more_files_than_history_size_scenario = (
         }
     )
     .set_file_type("csv")
-    .set_max_history_size(3)
+    .set_cursor_cls(LowHistoryLimitCursor)
     .set_expected_catalog(
         {
             "streams": [
@@ -1268,7 +1269,7 @@ multi_csv_sync_recent_files_if_history_is_incomplete_scenario = (
             },
         }
     )
-    .set_max_history_size(3)
+    .set_cursor_cls(LowHistoryLimitCursor)
     .set_file_type("csv")
     .set_expected_catalog(
         {
@@ -1386,7 +1387,7 @@ multi_csv_sync_files_within_time_window_if_history_is_incomplete__different_time
         }
     )
     .set_file_type("csv")
-    .set_max_history_size(3)
+    .set_cursor_cls(LowHistoryLimitCursor)
     .set_expected_catalog(
         {
             "streams": [
@@ -1509,7 +1510,7 @@ multi_csv_sync_files_within_history_time_window_if_history_is_incomplete_differe
         }
     )
     .set_file_type("csv")
-    .set_max_history_size(3)
+    .set_cursor_cls(LowHistoryLimitCursor)
     .set_expected_catalog(
         {
             "streams": [

unit_tests/sources/file_based/scenarios/scenario_builder.py CHANGED Viewed

@@ -11,10 +11,11 @@ from airbyte_cdk.sources.file_based.availability_strategy.abstract_file_based_av
     AbstractFileBasedAvailabilityStrategy,
 )
 from airbyte_cdk.sources.file_based.discovery_policy import AbstractDiscoveryPolicy, DefaultDiscoveryPolicy
-from airbyte_cdk.sources.file_based.file_based_source import DEFAULT_MAX_HISTORY_SIZE, default_parsers
+from airbyte_cdk.sources.file_based.file_based_source import default_parsers
 from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
 from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
 from airbyte_cdk.sources.file_based.schema_validation_policies import AbstractSchemaValidationPolicy
+from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor
 from unit_tests.sources.file_based.in_memory_files_source import InMemoryFilesSource
@@ -46,7 +47,7 @@ class TestScenario:
             expected_read_error: Tuple[Optional[Type[Exception]], Optional[str]],
             incremental_scenario_config: Optional[IncrementalScenarioConfig],
             file_write_options: Mapping[str, Any],
-            max_history_size: int,
+            cursor_cls: Optional[Type[AbstractFileBasedCursor]],
     ):
         self.name = name
         self.config = config
@@ -68,7 +69,7 @@ class TestScenario:
             stream_reader,
             self.configured_catalog(SyncMode.incremental if incremental_scenario_config else SyncMode.full_refresh),
             file_write_options,
-            max_history_size,
+            cursor_cls,
         )
         self.incremental_scenario_config = incremental_scenario_config
         self.validate()
@@ -124,7 +125,7 @@ class TestScenarioBuilder:
         self._expected_read_error: Tuple[Optional[Type[Exception]], Optional[str]] = None, None
         self._incremental_scenario_config: Optional[IncrementalScenarioConfig] = None
         self._file_write_options: Mapping[str, Any] = {}
-        self._max_history_size = DEFAULT_MAX_HISTORY_SIZE
+        self._cursor_cls: Optional[Type[AbstractFileBasedCursor]] = None
     def set_name(self, name: str) -> "TestScenarioBuilder":
         self._name = name
@@ -182,8 +183,8 @@ class TestScenarioBuilder:
         self._stream_reader = stream_reader
         return self
-    def set_max_history_size(self, max_history_size: int) -> "TestScenarioBuilder":
-        self._max_history_size = max_history_size
+    def set_cursor_cls(self, cursor_cls: AbstractFileBasedCursor) -> "TestScenarioBuilder":
+        self._cursor_cls = cursor_cls
         return self
     def set_incremental_scenario_config(self, incremental_scenario_config: IncrementalScenarioConfig) -> "TestScenarioBuilder":
@@ -232,5 +233,5 @@ class TestScenarioBuilder:
             self._expected_read_error,
             self._incremental_scenario_config,
             self._file_write_options,
-            self._max_history_size,
+            self._cursor_cls,
         )

unit_tests/sources/file_based/stream/test_default_file_based_cursor.py CHANGED Viewed

@@ -7,6 +7,7 @@ from typing import Any, List, Mapping
 from unittest.mock import MagicMock
 import pytest
+from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
 from airbyte_cdk.sources.file_based.remote_file import RemoteFile
 from airbyte_cdk.sources.file_based.stream.cursor.default_file_based_cursor import DefaultFileBasedCursor
 from freezegun import freeze_time
@@ -103,7 +104,7 @@ from freezegun import freeze_time
     ],
 )
 def test_add_file(files_to_add: List[RemoteFile], expected_start_time: List[datetime], expected_state_dict: Mapping[str, Any]) -> None:
-    cursor = DefaultFileBasedCursor(3, 3)
+    cursor = get_cursor(max_history_size=3, days_to_sync_if_history_is_full=3)
     assert cursor._compute_start_time() == datetime.min
     for index, f in enumerate(files_to_add):
@@ -160,7 +161,7 @@ def test_add_file(files_to_add: List[RemoteFile], expected_start_time: List[date
 ])
 def test_get_files_to_sync(files: List[RemoteFile], expected_files_to_sync: List[RemoteFile], max_history_size: int, history_is_partial: bool) -> None:
     logger = MagicMock()
-    cursor = DefaultFileBasedCursor(max_history_size, 3)
+    cursor = get_cursor(max_history_size, 3)
     files_to_sync = list(cursor.get_files_to_sync(files, logger))
     for f in files_to_sync:
@@ -173,7 +174,7 @@ def test_get_files_to_sync(files: List[RemoteFile], expected_files_to_sync: List
 @freeze_time("2023-06-16T00:00:00Z")
 def test_only_recent_files_are_synced_if_history_is_full() -> None:
     logger = MagicMock()
-    cursor = DefaultFileBasedCursor(2, 3)
+    cursor = get_cursor(2, 3)
     files_in_history = [
         RemoteFile(uri="b1.csv", last_modified=datetime(2021, 1, 2), file_type="csv"),
@@ -210,7 +211,7 @@ def test_only_recent_files_are_synced_if_history_is_full() -> None:
 ])
 def test_sync_file_already_present_in_history(modified_at_delta: timedelta, should_sync_file: bool) -> None:
     logger = MagicMock()
-    cursor = DefaultFileBasedCursor(2, 3)
+    cursor = get_cursor(2, 3)
     original_modified_at = datetime(2021, 1, 2)
     filename = "a.csv"
     files_in_history = [
@@ -245,7 +246,7 @@ def test_sync_file_already_present_in_history(modified_at_delta: timedelta, shou
 )
 def test_should_sync_file(file_name: str, last_modified: datetime, earliest_dt_in_history: datetime, should_sync_file: bool) -> None:
     logger = MagicMock()
-    cursor = DefaultFileBasedCursor(1, 3)
+    cursor = get_cursor(1, 3)
     cursor.add_file(RemoteFile(uri="b.csv", last_modified=earliest_dt_in_history, file_type="csv"))
     cursor._start_time = cursor._compute_start_time()
@@ -255,13 +256,13 @@ def test_should_sync_file(file_name: str, last_modified: datetime, earliest_dt_i
 def test_set_initial_state_no_history() -> None:
-    cursor = DefaultFileBasedCursor(1, 3)
+    cursor = get_cursor(1, 3)
     cursor.set_initial_state({})
-def test_instantiate_with_negative_values() -> None:
-    with pytest.raises(ValueError):
-        DefaultFileBasedCursor(-1, 3)
-    with pytest.raises(ValueError):
-        DefaultFileBasedCursor(1, -3)
+def get_cursor(max_history_size: int, days_to_sync_if_history_is_full: int) -> DefaultFileBasedCursor:
+    cursor_cls = DefaultFileBasedCursor
+    cursor_cls.DEFAULT_MAX_HISTORY_SIZE = max_history_size
+    config = FileBasedStreamConfig(
+        file_type="csv", name="test", validation_policy="emit_records", days_to_sync_if_history_is_full=days_to_sync_if_history_is_full)
+    return cursor_cls(config)

unit_tests/sources/file_based/test_scenarios.py CHANGED Viewed

@@ -34,10 +34,25 @@ from unit_tests.sources.file_based.scenarios.check_scenarios import (
     success_user_provided_schema_scenario,
 )
 from unit_tests.sources.file_based.scenarios.csv_scenarios import (
+    csv_autogenerate_column_names_scenario,
+    csv_custom_bool_values_scenario,
+    csv_custom_delimiter_in_double_quotes_scenario,
+    csv_custom_delimiter_with_escape_char_scenario,
     csv_custom_format_scenario,
+    csv_custom_null_values_scenario,
+    csv_double_quote_is_set_scenario,
+    csv_escape_char_is_set_scenario,
     csv_legacy_format_scenario,
     csv_multi_stream_scenario,
+    csv_newline_in_values_not_quoted_scenario,
+    csv_newline_in_values_quoted_value_scenario,
     csv_single_stream_scenario,
+    csv_skip_after_header_scenario,
+    csv_skip_before_and_after_header_scenario,
+    csv_skip_before_header_scenario,
+    csv_string_can_be_null_with_input_schemas_scenario,
+    csv_string_not_null_if_no_null_values_scenario,
+    csv_strings_can_be_null_not_quoted_scenario,
     empty_schema_inference_scenario,
     invalid_csv_scenario,
     multi_csv_scenario,
@@ -162,11 +177,26 @@ discover_scenarios = [
     jsonl_user_input_schema_scenario,
     schemaless_jsonl_scenario,
     schemaless_jsonl_multi_stream_scenario,
+    csv_string_can_be_null_with_input_schemas_scenario,
+    csv_string_not_null_if_no_null_values_scenario,
+    csv_strings_can_be_null_not_quoted_scenario,
+    csv_newline_in_values_quoted_value_scenario,
+    csv_escape_char_is_set_scenario,
+    csv_double_quote_is_set_scenario,
+    csv_custom_delimiter_with_escape_char_scenario,
+    csv_custom_delimiter_in_double_quotes_scenario,
+    csv_skip_before_header_scenario,
+    csv_skip_after_header_scenario,
+    csv_skip_before_and_after_header_scenario,
+    csv_custom_bool_values_scenario,
+    csv_custom_null_values_scenario,
     single_avro_scenario,
     avro_all_types_scenario,
     multiple_avro_combine_schema_scenario,
     multiple_streams_avro_scenario,
     avro_file_with_decimal_as_float_scenario,
+    csv_newline_in_values_not_quoted_scenario,
+    csv_autogenerate_column_names_scenario,
 ]

{airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

airbyte-cdk 0.50.0__py3-none-any.whl → 0.50.2__py3-none-any.whl

airbyte-cdk 0.50.0py3-none-any.whl → 0.50.2py3-none-any.whl