airbyte-cdk 0.68.4__py3-none-any.whl → 0.69.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. airbyte_cdk/entrypoint.py +27 -7
  2. airbyte_cdk/sources/connector_state_manager.py +0 -1
  3. airbyte_cdk/sources/file_based/file_based_source.py +4 -2
  4. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +2 -2
  5. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +2 -2
  6. airbyte_cdk/sources/file_based/stream/concurrent/cursor/{file_based_noop_cursor.py → file_based_final_state_cursor.py} +21 -6
  7. airbyte_cdk/sources/streams/concurrent/adapters.py +2 -2
  8. airbyte_cdk/sources/streams/concurrent/cursor.py +27 -3
  9. airbyte_cdk/sources/streams/concurrent/default_stream.py +7 -3
  10. airbyte_cdk/test/entrypoint_wrapper.py +1 -1
  11. airbyte_cdk/utils/message_utils.py +17 -0
  12. {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/METADATA +1 -1
  13. {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/RECORD +30 -28
  14. {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/WHEEL +1 -1
  15. unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +2 -2
  16. unit_tests/sources/file_based/scenarios/csv_scenarios.py +128 -37
  17. unit_tests/sources/file_based/stream/concurrent/test_adapters.py +3 -3
  18. unit_tests/sources/file_based/test_file_based_scenarios.py +13 -6
  19. unit_tests/sources/file_based/test_scenarios.py +32 -3
  20. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +2 -2
  21. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +16 -14
  22. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +5 -4
  23. unit_tests/sources/streams/concurrent/test_default_stream.py +8 -6
  24. unit_tests/sources/streams/test_stream_read.py +3 -2
  25. unit_tests/sources/test_concurrent_source.py +7 -5
  26. unit_tests/sources/test_source_read.py +2 -3
  27. unit_tests/test/test_entrypoint_wrapper.py +9 -6
  28. unit_tests/utils/test_message_utils.py +91 -0
  29. {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/LICENSE.txt +0 -0
  30. {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/top_level.txt +0 -0
@@ -467,30 +467,24 @@ single_csv_scenario: TestScenario[InMemoryFilesSource] = (
467
467
  )
468
468
  ).build()
469
469
 
470
- multi_format_analytics_scenario: TestScenario[InMemoryFilesSource] = (
470
+ csv_analytics_scenario: TestScenario[InMemoryFilesSource] = (
471
471
  TestScenarioBuilder[InMemoryFilesSource]()
472
- .set_name("multi_format_analytics")
472
+ .set_name("csv_analytics")
473
473
  .set_config(
474
474
  {
475
475
  "streams": [
476
476
  {
477
477
  "name": "stream1",
478
478
  "format": {"filetype": "csv"},
479
- "globs": ["file1.csv"],
479
+ "globs": ["a.csv"],
480
480
  "validation_policy": "Emit Record",
481
481
  },
482
482
  {
483
483
  "name": "stream2",
484
484
  "format": {"filetype": "csv"},
485
- "globs": ["file2.csv"],
486
- "validation_policy": "Emit Record",
487
- },
488
- {
489
- "name": "stream3",
490
- "format": {"filetype": "jsonl"},
491
- "globs": ["file3.jsonl"],
485
+ "globs": ["b.csv"],
492
486
  "validation_policy": "Emit Record",
493
- },
487
+ }
494
488
  ]
495
489
  }
496
490
  )
@@ -498,17 +492,21 @@ multi_format_analytics_scenario: TestScenario[InMemoryFilesSource] = (
498
492
  FileBasedSourceBuilder()
499
493
  .set_files(
500
494
  {
501
- "file1.csv": {
502
- "contents": [],
495
+ "a.csv": {
496
+ "contents": [
497
+ ("col1", "col2"),
498
+ ("val11a", "val12a"),
499
+ ("val21a", "val22a"),
500
+ ],
503
501
  "last_modified": "2023-06-05T03:54:07.000Z",
504
502
  },
505
- "file2.csv": {
506
- "contents": [],
507
- "last_modified": "2023-06-06T03:54:07.000Z",
508
- },
509
- "file3.jsonl": {
510
- "contents": [],
511
- "last_modified": "2023-06-07T03:54:07.000Z",
503
+ "b.csv": {
504
+ "contents": [
505
+ ("col1", "col2", "col3"),
506
+ ("val11b", "val12b", "val13b"),
507
+ ("val21b", "val22b", "val23b"),
508
+ ],
509
+ "last_modified": "2023-06-05T03:54:07.000Z",
512
510
  },
513
511
  }
514
512
  )
@@ -521,7 +519,12 @@ multi_format_analytics_scenario: TestScenario[InMemoryFilesSource] = (
521
519
  "default_cursor_field": ["_ab_source_file_last_modified"],
522
520
  "json_schema": {
523
521
  "type": "object",
524
- "properties": {},
522
+ "properties": {
523
+ "col1": {"type": ["null", "string"]},
524
+ "col2": {"type": ["null", "string"]},
525
+ "_ab_source_file_last_modified": {"type": "string"},
526
+ "_ab_source_file_url": {"type": "string"},
527
+ },
525
528
  },
526
529
  "name": "stream1",
527
530
  "source_defined_cursor": True,
@@ -531,30 +534,64 @@ multi_format_analytics_scenario: TestScenario[InMemoryFilesSource] = (
531
534
  "default_cursor_field": ["_ab_source_file_last_modified"],
532
535
  "json_schema": {
533
536
  "type": "object",
534
- "properties": {},
537
+ "properties": {
538
+ "col1": {"type": ["null", "string"]},
539
+ "col2": {"type": ["null", "string"]},
540
+ "col3": {"type": ["null", "string"]},
541
+ "_ab_source_file_last_modified": {"type": "string"},
542
+ "_ab_source_file_url": {"type": "string"},
543
+ },
535
544
  },
536
545
  "name": "stream2",
537
546
  "source_defined_cursor": True,
538
547
  "supported_sync_modes": ["full_refresh", "incremental"],
539
- },
540
- {
541
- "default_cursor_field": ["_ab_source_file_last_modified"],
542
- "json_schema": {
543
- "type": "object",
544
- "properties": {},
545
- },
546
- "name": "stream3",
547
- "source_defined_cursor": True,
548
- "supported_sync_modes": ["full_refresh", "incremental"],
549
- },
548
+ }
550
549
  ]
551
550
  }
552
551
  )
553
- .set_expected_records([])
552
+ .set_expected_records([
553
+ {
554
+ "data": {
555
+ "col1": "val11a",
556
+ "col2": "val12a",
557
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
558
+ "_ab_source_file_url": "a.csv",
559
+ },
560
+ "stream": "stream1",
561
+ },
562
+ {
563
+ "data": {
564
+ "col1": "val21a",
565
+ "col2": "val22a",
566
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
567
+ "_ab_source_file_url": "a.csv",
568
+ },
569
+ "stream": "stream1",
570
+ },
571
+ {
572
+ "data": {
573
+ "col1": "val11b",
574
+ "col2": "val12b",
575
+ "col3": "val13b",
576
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
577
+ "_ab_source_file_url": "b.csv",
578
+ },
579
+ "stream": "stream2",
580
+ },
581
+ {
582
+ "data": {
583
+ "col1": "val21b",
584
+ "col2": "val22b",
585
+ "col3": "val23b",
586
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
587
+ "_ab_source_file_url": "b.csv",
588
+ },
589
+ "stream": "stream2",
590
+ },
591
+ ])
554
592
  .set_expected_analytics(
555
593
  [
556
594
  AirbyteAnalyticsTraceMessage(type="file-cdk-csv-stream-count", value="2"),
557
- AirbyteAnalyticsTraceMessage(type="file-cdk-jsonl-stream-count", value="1"),
558
595
  ]
559
596
  )
560
597
  ).build()
@@ -1450,7 +1487,6 @@ empty_schema_inference_scenario: TestScenario[InMemoryFilesSource] = (
1450
1487
  }
1451
1488
  )
1452
1489
  .set_expected_discover_error(AirbyteTracedException, FileBasedSourceError.SCHEMA_INFERENCE_ERROR.value)
1453
- .set_expected_records([])
1454
1490
  ).build()
1455
1491
 
1456
1492
  schemaless_csv_scenario: TestScenario[InMemoryFilesSource] = (
@@ -3009,6 +3045,61 @@ earlier_csv_scenario: TestScenario[InMemoryFilesSource] = (
3009
3045
  ]
3010
3046
  }
3011
3047
  )
3012
- .set_expected_records([])
3013
3048
  .set_expected_discover_error(AirbyteTracedException, FileBasedSourceError.SCHEMA_INFERENCE_ERROR.value)
3014
3049
  ).build()
3050
+
3051
+ csv_no_records_scenario: TestScenario[InMemoryFilesSource] = (
3052
+ TestScenarioBuilder[InMemoryFilesSource]()
3053
+ .set_name("csv_empty_no_records")
3054
+ .set_config(
3055
+ {
3056
+ "streams": [
3057
+ {
3058
+ "name": "stream1",
3059
+ "globs": ["*"],
3060
+ "validation_policy": "Emit Record",
3061
+ "input_schema": '{"col1": "boolean", "col2": "string"}',
3062
+ "format": {
3063
+ "filetype": "csv",
3064
+ "null_values": ["null"],
3065
+ },
3066
+ }
3067
+ ],
3068
+ "start_date": "2023-06-04T03:54:07.000000Z",
3069
+ }
3070
+ )
3071
+ .set_source_builder(
3072
+ FileBasedSourceBuilder()
3073
+ .set_files(
3074
+ {
3075
+ "a.csv": {
3076
+ "contents": [("col1", "col2")], # column headers, but no data rows
3077
+ "last_modified": "2023-06-05T03:54:07.000Z",
3078
+ }
3079
+ }
3080
+ )
3081
+ .set_file_type("csv")
3082
+ )
3083
+ .set_expected_catalog(
3084
+ {
3085
+ "streams": [
3086
+ {
3087
+ "default_cursor_field": ["_ab_source_file_last_modified"],
3088
+ "json_schema": {
3089
+ "type": "object",
3090
+ "properties": {
3091
+ "col1": {"type": "boolean"},
3092
+ "col2": {"type": "string"},
3093
+ "_ab_source_file_last_modified": {"type": "string"},
3094
+ "_ab_source_file_url": {"type": "string"},
3095
+ },
3096
+ },
3097
+ "name": "stream1",
3098
+ "source_defined_cursor": True,
3099
+ "supported_sync_modes": ["full_refresh", "incremental"],
3100
+ }
3101
+ ]
3102
+ }
3103
+ )
3104
+ .set_expected_records([])
3105
+ ).build()
@@ -23,7 +23,7 @@ from airbyte_cdk.sources.file_based.stream.concurrent.adapters import (
23
23
  FileBasedStreamPartition,
24
24
  FileBasedStreamPartitionGenerator,
25
25
  )
26
- from airbyte_cdk.sources.file_based.stream.concurrent.cursor import FileBasedNoopCursor
26
+ from airbyte_cdk.sources.file_based.stream.concurrent.cursor import FileBasedFinalStateCursor
27
27
  from airbyte_cdk.sources.message import InMemoryMessageRepository
28
28
  from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
29
29
  from airbyte_cdk.sources.streams.concurrent.exceptions import ExceptionWithDisplayMessage
@@ -36,7 +36,7 @@ _ANY_SYNC_MODE = SyncMode.full_refresh
36
36
  _ANY_STATE = {"state_key": "state_value"}
37
37
  _ANY_CURSOR_FIELD = ["a", "cursor", "key"]
38
38
  _STREAM_NAME = "stream"
39
- _ANY_CURSOR = Mock(spec=FileBasedNoopCursor)
39
+ _ANY_CURSOR = Mock(spec=FileBasedFinalStateCursor)
40
40
 
41
41
 
42
42
  @pytest.mark.parametrize(
@@ -165,7 +165,7 @@ class StreamFacadeTest(unittest.TestCase):
165
165
  supported_sync_modes=[SyncMode.full_refresh],
166
166
  )
167
167
  self._legacy_stream = DefaultFileBasedStream(
168
- cursor=FileBasedNoopCursor(MagicMock()),
168
+ cursor=FileBasedFinalStateCursor(stream_config=MagicMock(), stream_namespace=None, message_repository=Mock()),
169
169
  config=FileBasedStreamConfig(name="stream", format=CsvFormat()),
170
170
  catalog_schema={},
171
171
  stream_reader=MagicMock(),
@@ -50,6 +50,7 @@ from unit_tests.sources.file_based.scenarios.concurrent_incremental_scenarios im
50
50
  single_csv_no_input_state_scenario_concurrent,
51
51
  )
52
52
  from unit_tests.sources.file_based.scenarios.csv_scenarios import (
53
+ csv_analytics_scenario,
53
54
  csv_autogenerate_column_names_scenario,
54
55
  csv_custom_bool_values_scenario,
55
56
  csv_custom_delimiter_in_double_quotes_scenario,
@@ -61,6 +62,7 @@ from unit_tests.sources.file_based.scenarios.csv_scenarios import (
61
62
  csv_multi_stream_scenario,
62
63
  csv_newline_in_values_not_quoted_scenario,
63
64
  csv_newline_in_values_quoted_value_scenario,
65
+ csv_no_records_scenario,
64
66
  csv_single_stream_scenario,
65
67
  csv_skip_after_header_scenario,
66
68
  csv_skip_before_and_after_header_scenario,
@@ -75,7 +77,6 @@ from unit_tests.sources.file_based.scenarios.csv_scenarios import (
75
77
  invalid_csv_scenario,
76
78
  multi_csv_scenario,
77
79
  multi_csv_stream_n_file_exceeds_limit_for_inference,
78
- multi_format_analytics_scenario,
79
80
  multi_stream_custom_format,
80
81
  schemaless_csv_multi_stream_scenario,
81
82
  schemaless_csv_scenario,
@@ -152,7 +153,13 @@ from unit_tests.sources.file_based.scenarios.validation_policy_scenarios import
152
153
  )
153
154
  from unit_tests.sources.file_based.test_scenarios import verify_check, verify_discover, verify_read, verify_spec
154
155
 
155
- discover_scenarios = [
156
+ discover_failure_scenarios = [
157
+ earlier_csv_scenario,
158
+ empty_schema_inference_scenario,
159
+ ]
160
+
161
+ discover_success_scenarios = [
162
+ csv_no_records_scenario,
156
163
  csv_multi_stream_scenario,
157
164
  csv_single_stream_scenario,
158
165
  invalid_csv_scenario,
@@ -176,9 +183,7 @@ discover_scenarios = [
176
183
  single_csv_file_is_skipped_if_same_modified_at_as_in_history,
177
184
  single_csv_file_is_synced_if_modified_at_is_more_recent_than_in_history,
178
185
  csv_custom_format_scenario,
179
- earlier_csv_scenario,
180
186
  multi_stream_custom_format,
181
- empty_schema_inference_scenario,
182
187
  single_parquet_scenario,
183
188
  multi_parquet_scenario,
184
189
  parquet_various_types_scenario,
@@ -260,12 +265,14 @@ discover_scenarios = [
260
265
  single_csv_no_input_state_scenario_concurrent,
261
266
  ]
262
267
 
263
- read_scenarios = discover_scenarios + [
268
+ discover_scenarios = discover_failure_scenarios + discover_success_scenarios
269
+
270
+ read_scenarios = discover_success_scenarios + [
264
271
  emit_record_scenario_multi_stream,
265
272
  emit_record_scenario_single_stream,
266
273
  skip_record_scenario_multi_stream,
267
274
  skip_record_scenario_single_stream,
268
- multi_format_analytics_scenario,
275
+ csv_analytics_scenario,
269
276
  wait_for_rediscovery_scenario_multi_stream,
270
277
  wait_for_rediscovery_scenario_single_stream,
271
278
  ]
@@ -16,6 +16,7 @@ from airbyte_cdk.sources import AbstractSource
16
16
  from airbyte_cdk.sources.file_based.stream.concurrent.cursor import AbstractConcurrentFileBasedCursor
17
17
  from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput
18
18
  from airbyte_cdk.test.entrypoint_wrapper import read as entrypoint_read
19
+ from airbyte_cdk.utils import message_utils
19
20
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
20
21
  from airbyte_protocol.models import AirbyteLogMessage, AirbyteMessage, ConfiguredAirbyteCatalog
21
22
  from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenario
@@ -71,7 +72,7 @@ def assert_exception(expected_exception: type[BaseException], output: Entrypoint
71
72
 
72
73
 
73
74
  def _verify_read_output(output: EntrypointOutput, scenario: TestScenario[AbstractSource]) -> None:
74
- records, log_messages = output.records_and_state_messages, output.logs
75
+ records_and_state_messages, log_messages = output.records_and_state_messages, output.logs
75
76
  logs = [message.log for message in log_messages if message.log.level.value in scenario.log_levels]
76
77
  if scenario.expected_records is None:
77
78
  return
@@ -85,7 +86,7 @@ def _verify_read_output(output: EntrypointOutput, scenario: TestScenario[Abstrac
85
86
  ),
86
87
  )
87
88
  sorted_records = sorted(
88
- filter(lambda r: r.record, records),
89
+ filter(lambda r: r.record, records_and_state_messages),
89
90
  key=lambda record: ",".join(
90
91
  f"{k}={v}" for k, v in sorted(record.record.data.items(), key=lambda items: (items[0], items[1])) if k != "emitted_at"
91
92
  ),
@@ -104,7 +105,9 @@ def _verify_read_output(output: EntrypointOutput, scenario: TestScenario[Abstrac
104
105
  assert actual.record.stream == expected["stream"]
105
106
 
106
107
  expected_states = list(filter(lambda e: "data" not in e, expected_records))
107
- states = list(filter(lambda r: r.state, records))
108
+ states = list(filter(lambda r: r.state, records_and_state_messages))
109
+ assert len(states) > 0, "No state messages emitted. Successful syncs should emit at least one stream state."
110
+ _verify_state_record_counts(sorted_records, states)
108
111
 
109
112
  if hasattr(scenario.source, "cursor_cls") and issubclass(scenario.source.cursor_cls, AbstractConcurrentFileBasedCursor):
110
113
  # Only check the last state emitted because we don't know the order the others will be in.
@@ -125,8 +128,34 @@ def _verify_read_output(output: EntrypointOutput, scenario: TestScenario[Abstrac
125
128
  _verify_analytics(analytics, scenario.expected_analytics)
126
129
 
127
130
 
131
+ def _verify_state_record_counts(records: List[AirbyteMessage], states: List[AirbyteMessage]) -> None:
132
+ actual_record_counts = {}
133
+ for record in records:
134
+ stream_descriptor = message_utils.get_stream_descriptor(record)
135
+ actual_record_counts[stream_descriptor] = actual_record_counts.get(stream_descriptor, 0) + 1
136
+
137
+ state_record_count_sums = {}
138
+ for state_message in states:
139
+ stream_descriptor = message_utils.get_stream_descriptor(state_message)
140
+ state_record_count_sums[stream_descriptor] = (
141
+ state_record_count_sums.get(stream_descriptor, 0)
142
+ + state_message.state.sourceStats.recordCount
143
+ )
144
+
145
+ for stream, actual_count in actual_record_counts.items():
146
+ assert state_record_count_sums.get(stream) == actual_count
147
+
148
+ # We can have extra keys in state_record_count_sums if we processed a stream and reported 0 records
149
+ extra_keys = state_record_count_sums.keys() - actual_record_counts.keys()
150
+ for stream in extra_keys:
151
+ assert state_record_count_sums[stream] == 0
152
+
153
+
128
154
  def _verify_analytics(analytics: List[AirbyteMessage], expected_analytics: Optional[List[AirbyteAnalyticsTraceMessage]]) -> None:
129
155
  if expected_analytics:
156
+ assert len(analytics) == len(
157
+ expected_analytics), \
158
+ f"Number of actual analytics messages ({len(analytics)}) did not match expected ({len(expected_analytics)})"
130
159
  for actual, expected in zip(analytics, expected_analytics):
131
160
  actual_type, actual_value = actual.trace.analytics.type, actual.trace.analytics.value
132
161
  expected_type = expected.type
@@ -21,7 +21,7 @@ from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageReposi
21
21
  from airbyte_cdk.sources.source import TState
22
22
  from airbyte_cdk.sources.streams import Stream
23
23
  from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
24
- from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField, NoopCursor
24
+ from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField, FinalStateCursor
25
25
  from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import EpochValueConcurrentStreamStateConverter
26
26
  from airbyte_protocol.models import ConfiguredAirbyteStream
27
27
  from unit_tests.sources.file_based.scenarios.scenario_builder import SourceBuilder
@@ -83,7 +83,7 @@ class StreamFacadeSource(ConcurrentSourceAdapter):
83
83
  None,
84
84
  )
85
85
  if self._cursor_field
86
- else NoopCursor(),
86
+ else FinalStateCursor(stream_name=stream.name, stream_namespace=stream.namespace, message_repository=self.message_repository),
87
87
  )
88
88
  for stream, state in zip(self._streams, stream_states)
89
89
  ]
@@ -4,7 +4,7 @@
4
4
  import logging
5
5
 
6
6
  from airbyte_cdk.sources.message import InMemoryMessageRepository
7
- from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor
7
+ from airbyte_cdk.sources.streams.concurrent.cursor import FinalStateCursor
8
8
  from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
9
9
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
10
10
  from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenarioBuilder
@@ -15,6 +15,8 @@ from unit_tests.sources.streams.concurrent.scenarios.thread_based_concurrent_str
15
15
  InMemoryPartitionGenerator,
16
16
  )
17
17
 
18
+ _message_repository = InMemoryMessageRepository()
19
+
18
20
  _id_only_stream = DefaultStream(
19
21
  partition_generator=InMemoryPartitionGenerator(
20
22
  [InMemoryPartition("partition1", "stream1", None, [Record({"id": "1"}, "stream1"), Record({"id": "2"}, "stream1")])]
@@ -30,7 +32,7 @@ _id_only_stream = DefaultStream(
30
32
  primary_key=[],
31
33
  cursor_field=None,
32
34
  logger=logging.getLogger("test_logger"),
33
- cursor=NoopCursor(),
35
+ cursor=FinalStateCursor(stream_name="stream1", stream_namespace=None, message_repository=_message_repository),
34
36
  )
35
37
 
36
38
  _id_only_stream_with_slice_logger = DefaultStream(
@@ -48,7 +50,7 @@ _id_only_stream_with_slice_logger = DefaultStream(
48
50
  primary_key=[],
49
51
  cursor_field=None,
50
52
  logger=logging.getLogger("test_logger"),
51
- cursor=NoopCursor(),
53
+ cursor=FinalStateCursor(stream_name="stream1", stream_namespace=None, message_repository=_message_repository),
52
54
  )
53
55
 
54
56
  _id_only_stream_with_primary_key = DefaultStream(
@@ -66,7 +68,7 @@ _id_only_stream_with_primary_key = DefaultStream(
66
68
  primary_key=["id"],
67
69
  cursor_field=None,
68
70
  logger=logging.getLogger("test_logger"),
69
- cursor=NoopCursor(),
71
+ cursor=FinalStateCursor(stream_name="stream1", stream_namespace=None, message_repository=_message_repository),
70
72
  )
71
73
 
72
74
  _id_only_stream_multiple_partitions = DefaultStream(
@@ -87,7 +89,7 @@ _id_only_stream_multiple_partitions = DefaultStream(
87
89
  primary_key=[],
88
90
  cursor_field=None,
89
91
  logger=logging.getLogger("test_logger"),
90
- cursor=NoopCursor(),
92
+ cursor=FinalStateCursor(stream_name="stream1", stream_namespace=None, message_repository=_message_repository),
91
93
  )
92
94
 
93
95
  _id_only_stream_multiple_partitions_concurrency_level_two = DefaultStream(
@@ -108,7 +110,7 @@ _id_only_stream_multiple_partitions_concurrency_level_two = DefaultStream(
108
110
  primary_key=[],
109
111
  cursor_field=None,
110
112
  logger=logging.getLogger("test_logger"),
111
- cursor=NoopCursor(),
113
+ cursor=FinalStateCursor(stream_name="stream1", stream_namespace=None, message_repository=_message_repository),
112
114
  )
113
115
 
114
116
  _stream_raising_exception = DefaultStream(
@@ -126,7 +128,7 @@ _stream_raising_exception = DefaultStream(
126
128
  primary_key=[],
127
129
  cursor_field=None,
128
130
  logger=logging.getLogger("test_logger"),
129
- cursor=NoopCursor(),
131
+ cursor=FinalStateCursor(stream_name="stream1", stream_namespace=None, message_repository=_message_repository),
130
132
  )
131
133
 
132
134
  test_concurrent_cdk_single_stream = (
@@ -140,7 +142,7 @@ test_concurrent_cdk_single_stream = (
140
142
  _id_only_stream,
141
143
  ]
142
144
  )
143
- .set_message_repository(InMemoryMessageRepository())
145
+ .set_message_repository(_message_repository)
144
146
  )
145
147
  .set_expected_records(
146
148
  [
@@ -193,7 +195,7 @@ test_concurrent_cdk_single_stream_with_primary_key = (
193
195
  _id_only_stream_with_primary_key,
194
196
  ]
195
197
  )
196
- .set_message_repository(InMemoryMessageRepository())
198
+ .set_message_repository(_message_repository)
197
199
  )
198
200
  .set_expected_records(
199
201
  [
@@ -253,11 +255,11 @@ test_concurrent_cdk_multiple_streams = (
253
255
  primary_key=[],
254
256
  cursor_field=None,
255
257
  logger=logging.getLogger("test_logger"),
256
- cursor=NoopCursor(),
258
+ cursor=FinalStateCursor(stream_name="stream2", stream_namespace=None, message_repository=_message_repository),
257
259
  ),
258
260
  ]
259
261
  )
260
- .set_message_repository(InMemoryMessageRepository())
262
+ .set_message_repository(_message_repository)
261
263
  )
262
264
  .set_expected_records(
263
265
  [
@@ -308,7 +310,7 @@ test_concurrent_cdk_partition_raises_exception = (
308
310
  _stream_raising_exception,
309
311
  ]
310
312
  )
311
- .set_message_repository(InMemoryMessageRepository())
313
+ .set_message_repository(_message_repository)
312
314
  )
313
315
  .set_expected_records(
314
316
  [
@@ -346,7 +348,7 @@ test_concurrent_cdk_single_stream_multiple_partitions = (
346
348
  _id_only_stream_multiple_partitions,
347
349
  ]
348
350
  )
349
- .set_message_repository(InMemoryMessageRepository())
351
+ .set_message_repository(_message_repository)
350
352
  )
351
353
  .set_expected_records(
352
354
  [
@@ -386,7 +388,7 @@ test_concurrent_cdk_single_stream_multiple_partitions_concurrency_level_two = (
386
388
  _id_only_stream_multiple_partitions_concurrency_level_two,
387
389
  ]
388
390
  )
389
- .set_message_repository(InMemoryMessageRepository())
391
+ .set_message_repository(_message_repository)
390
392
  )
391
393
  .set_expected_records(
392
394
  [
@@ -8,11 +8,11 @@ from typing import Any, Iterable, List, Mapping, Optional, Tuple, Union
8
8
  from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConnectorSpecification, DestinationSyncMode, SyncMode
9
9
  from airbyte_cdk.sources.concurrent_source.concurrent_source import ConcurrentSource
10
10
  from airbyte_cdk.sources.concurrent_source.concurrent_source_adapter import ConcurrentSourceAdapter
11
- from airbyte_cdk.sources.message import MessageRepository
11
+ from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
12
12
  from airbyte_cdk.sources.streams import Stream
13
13
  from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
14
14
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import AbstractAvailabilityStrategy, StreamAvailability, StreamAvailable
15
- from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor
15
+ from airbyte_cdk.sources.streams.concurrent.cursor import FinalStateCursor
16
16
  from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
17
17
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
18
18
  from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
@@ -42,13 +42,14 @@ class ConcurrentCdkSource(ConcurrentSourceAdapter):
42
42
  concurrent_source = ConcurrentSource.create(1, 1, streams[0]._logger, NeverLogSliceLogger(), message_repository)
43
43
  super().__init__(concurrent_source)
44
44
  self._streams = streams
45
+ self._message_repository = message_repository
45
46
 
46
47
  def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
47
48
  # Check is not verified because it is up to the source to implement this method
48
49
  return True, None
49
50
 
50
51
  def streams(self, config: Mapping[str, Any]) -> List[Stream]:
51
- return [StreamFacade(s, LegacyStream(), NoopCursor(), NeverLogSliceLogger(), s._logger) for s in self._streams]
52
+ return [StreamFacade(s, LegacyStream(), FinalStateCursor(stream_name=s.name, stream_namespace=s.namespace, message_repository=self.message_repository), NeverLogSliceLogger(), s._logger) for s in self._streams]
52
53
 
53
54
  def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification:
54
55
  return ConnectorSpecification(connectionSpecification={})
@@ -57,7 +58,7 @@ class ConcurrentCdkSource(ConcurrentSourceAdapter):
57
58
  return ConfiguredAirbyteCatalog(
58
59
  streams=[
59
60
  ConfiguredAirbyteStream(
60
- stream=StreamFacade(s, LegacyStream(), NoopCursor(), NeverLogSliceLogger(), s._logger).as_airbyte_stream(),
61
+ stream=StreamFacade(s, LegacyStream(), FinalStateCursor(stream_name=s.name, stream_namespace=s.namespace, message_repository=InMemoryMessageRepository()), NeverLogSliceLogger(), s._logger).as_airbyte_stream(),
61
62
  sync_mode=SyncMode.full_refresh,
62
63
  destination_sync_mode=DestinationSyncMode.overwrite,
63
64
  )
@@ -5,8 +5,9 @@ import unittest
5
5
  from unittest.mock import Mock
6
6
 
7
7
  from airbyte_cdk.models import AirbyteStream, SyncMode
8
+ from airbyte_cdk.sources.message import InMemoryMessageRepository
8
9
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import STREAM_AVAILABLE
9
- from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, NoopCursor
10
+ from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, FinalStateCursor
10
11
  from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
11
12
 
12
13
 
@@ -20,6 +21,7 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
20
21
  self._cursor_field = None
21
22
  self._logger = Mock()
22
23
  self._cursor = Mock(spec=Cursor)
24
+ self._message_repository = InMemoryMessageRepository()
23
25
  self._stream = DefaultStream(
24
26
  self._partition_generator,
25
27
  self._name,
@@ -28,7 +30,7 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
28
30
  self._primary_key,
29
31
  self._cursor_field,
30
32
  self._logger,
31
- NoopCursor(),
33
+ FinalStateCursor(stream_name=self._name, stream_namespace=None, message_repository=self._message_repository),
32
34
  )
33
35
 
34
36
  def test_get_json_schema(self):
@@ -89,7 +91,7 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
89
91
  ["id"],
90
92
  self._cursor_field,
91
93
  self._logger,
92
- NoopCursor(),
94
+ FinalStateCursor(stream_name=self._name, stream_namespace=None, message_repository=self._message_repository),
93
95
  )
94
96
 
95
97
  expected_airbyte_stream = AirbyteStream(
@@ -121,7 +123,7 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
121
123
  ["id_a", "id_b"],
122
124
  self._cursor_field,
123
125
  self._logger,
124
- NoopCursor(),
126
+ FinalStateCursor(stream_name=self._name, stream_namespace=None, message_repository=self._message_repository),
125
127
  )
126
128
 
127
129
  expected_airbyte_stream = AirbyteStream(
@@ -153,7 +155,7 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
153
155
  self._primary_key,
154
156
  "date",
155
157
  self._logger,
156
- NoopCursor(),
158
+ FinalStateCursor(stream_name=self._name, stream_namespace=None, message_repository=self._message_repository),
157
159
  )
158
160
 
159
161
  expected_airbyte_stream = AirbyteStream(
@@ -178,7 +180,7 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
178
180
  self._primary_key,
179
181
  self._cursor_field,
180
182
  self._logger,
181
- NoopCursor(),
183
+ FinalStateCursor(stream_name=self._name, stream_namespace=None, message_repository=self._message_repository),
182
184
  namespace="test",
183
185
  )
184
186
  expected_airbyte_stream = AirbyteStream(
@@ -26,7 +26,7 @@ from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
26
26
  from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
27
27
  from airbyte_cdk.sources.streams import Stream
28
28
  from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
29
- from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, NoopCursor
29
+ from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, FinalStateCursor
30
30
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
31
31
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
32
32
  from airbyte_cdk.sources.streams.core import StreamData
@@ -105,8 +105,9 @@ def _stream(slice_to_partition_mapping, slice_logger, logger, message_repository
105
105
  return _MockStream(slice_to_partition_mapping)
106
106
 
107
107
 
108
- def _concurrent_stream(slice_to_partition_mapping, slice_logger, logger, message_repository, cursor: Cursor = NoopCursor()):
108
+ def _concurrent_stream(slice_to_partition_mapping, slice_logger, logger, message_repository, cursor: Optional[Cursor] = None):
109
109
  stream = _stream(slice_to_partition_mapping, slice_logger, logger, message_repository)
110
+ cursor = cursor or FinalStateCursor(stream_name=stream.name, stream_namespace=stream.namespace, message_repository=message_repository)
110
111
  source = Mock()
111
112
  source._slice_logger = slice_logger
112
113
  source.message_repository = message_repository