airbyte-cdk 0.68.4__py3-none-any.whl → 0.69.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (30) hide show
  1. airbyte_cdk/entrypoint.py +27 -7
  2. airbyte_cdk/sources/connector_state_manager.py +0 -1
  3. airbyte_cdk/sources/file_based/file_based_source.py +4 -2
  4. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +2 -2
  5. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +2 -2
  6. airbyte_cdk/sources/file_based/stream/concurrent/cursor/{file_based_noop_cursor.py → file_based_final_state_cursor.py} +21 -6
  7. airbyte_cdk/sources/streams/concurrent/adapters.py +2 -2
  8. airbyte_cdk/sources/streams/concurrent/cursor.py +27 -3
  9. airbyte_cdk/sources/streams/concurrent/default_stream.py +7 -3
  10. airbyte_cdk/test/entrypoint_wrapper.py +1 -1
  11. airbyte_cdk/utils/message_utils.py +17 -0
  12. {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/METADATA +1 -1
  13. {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/RECORD +30 -28
  14. {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/WHEEL +1 -1
  15. unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +2 -2
  16. unit_tests/sources/file_based/scenarios/csv_scenarios.py +128 -37
  17. unit_tests/sources/file_based/stream/concurrent/test_adapters.py +3 -3
  18. unit_tests/sources/file_based/test_file_based_scenarios.py +13 -6
  19. unit_tests/sources/file_based/test_scenarios.py +32 -3
  20. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +2 -2
  21. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +16 -14
  22. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +5 -4
  23. unit_tests/sources/streams/concurrent/test_default_stream.py +8 -6
  24. unit_tests/sources/streams/test_stream_read.py +3 -2
  25. unit_tests/sources/test_concurrent_source.py +7 -5
  26. unit_tests/sources/test_source_read.py +2 -3
  27. unit_tests/test/test_entrypoint_wrapper.py +9 -6
  28. unit_tests/utils/test_message_utils.py +91 -0
  29. {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/LICENSE.txt +0 -0
  30. {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/top_level.txt +0 -0
@@ -467,30 +467,24 @@ single_csv_scenario: TestScenario[InMemoryFilesSource] = (
467
467
  )
468
468
  ).build()
469
469
 
470
- multi_format_analytics_scenario: TestScenario[InMemoryFilesSource] = (
470
+ csv_analytics_scenario: TestScenario[InMemoryFilesSource] = (
471
471
  TestScenarioBuilder[InMemoryFilesSource]()
472
- .set_name("multi_format_analytics")
472
+ .set_name("csv_analytics")
473
473
  .set_config(
474
474
  {
475
475
  "streams": [
476
476
  {
477
477
  "name": "stream1",
478
478
  "format": {"filetype": "csv"},
479
- "globs": ["file1.csv"],
479
+ "globs": ["a.csv"],
480
480
  "validation_policy": "Emit Record",
481
481
  },
482
482
  {
483
483
  "name": "stream2",
484
484
  "format": {"filetype": "csv"},
485
- "globs": ["file2.csv"],
486
- "validation_policy": "Emit Record",
487
- },
488
- {
489
- "name": "stream3",
490
- "format": {"filetype": "jsonl"},
491
- "globs": ["file3.jsonl"],
485
+ "globs": ["b.csv"],
492
486
  "validation_policy": "Emit Record",
493
- },
487
+ }
494
488
  ]
495
489
  }
496
490
  )
@@ -498,17 +492,21 @@ multi_format_analytics_scenario: TestScenario[InMemoryFilesSource] = (
498
492
  FileBasedSourceBuilder()
499
493
  .set_files(
500
494
  {
501
- "file1.csv": {
502
- "contents": [],
495
+ "a.csv": {
496
+ "contents": [
497
+ ("col1", "col2"),
498
+ ("val11a", "val12a"),
499
+ ("val21a", "val22a"),
500
+ ],
503
501
  "last_modified": "2023-06-05T03:54:07.000Z",
504
502
  },
505
- "file2.csv": {
506
- "contents": [],
507
- "last_modified": "2023-06-06T03:54:07.000Z",
508
- },
509
- "file3.jsonl": {
510
- "contents": [],
511
- "last_modified": "2023-06-07T03:54:07.000Z",
503
+ "b.csv": {
504
+ "contents": [
505
+ ("col1", "col2", "col3"),
506
+ ("val11b", "val12b", "val13b"),
507
+ ("val21b", "val22b", "val23b"),
508
+ ],
509
+ "last_modified": "2023-06-05T03:54:07.000Z",
512
510
  },
513
511
  }
514
512
  )
@@ -521,7 +519,12 @@ multi_format_analytics_scenario: TestScenario[InMemoryFilesSource] = (
521
519
  "default_cursor_field": ["_ab_source_file_last_modified"],
522
520
  "json_schema": {
523
521
  "type": "object",
524
- "properties": {},
522
+ "properties": {
523
+ "col1": {"type": ["null", "string"]},
524
+ "col2": {"type": ["null", "string"]},
525
+ "_ab_source_file_last_modified": {"type": "string"},
526
+ "_ab_source_file_url": {"type": "string"},
527
+ },
525
528
  },
526
529
  "name": "stream1",
527
530
  "source_defined_cursor": True,
@@ -531,30 +534,64 @@ multi_format_analytics_scenario: TestScenario[InMemoryFilesSource] = (
531
534
  "default_cursor_field": ["_ab_source_file_last_modified"],
532
535
  "json_schema": {
533
536
  "type": "object",
534
- "properties": {},
537
+ "properties": {
538
+ "col1": {"type": ["null", "string"]},
539
+ "col2": {"type": ["null", "string"]},
540
+ "col3": {"type": ["null", "string"]},
541
+ "_ab_source_file_last_modified": {"type": "string"},
542
+ "_ab_source_file_url": {"type": "string"},
543
+ },
535
544
  },
536
545
  "name": "stream2",
537
546
  "source_defined_cursor": True,
538
547
  "supported_sync_modes": ["full_refresh", "incremental"],
539
- },
540
- {
541
- "default_cursor_field": ["_ab_source_file_last_modified"],
542
- "json_schema": {
543
- "type": "object",
544
- "properties": {},
545
- },
546
- "name": "stream3",
547
- "source_defined_cursor": True,
548
- "supported_sync_modes": ["full_refresh", "incremental"],
549
- },
548
+ }
550
549
  ]
551
550
  }
552
551
  )
553
- .set_expected_records([])
552
+ .set_expected_records([
553
+ {
554
+ "data": {
555
+ "col1": "val11a",
556
+ "col2": "val12a",
557
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
558
+ "_ab_source_file_url": "a.csv",
559
+ },
560
+ "stream": "stream1",
561
+ },
562
+ {
563
+ "data": {
564
+ "col1": "val21a",
565
+ "col2": "val22a",
566
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
567
+ "_ab_source_file_url": "a.csv",
568
+ },
569
+ "stream": "stream1",
570
+ },
571
+ {
572
+ "data": {
573
+ "col1": "val11b",
574
+ "col2": "val12b",
575
+ "col3": "val13b",
576
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
577
+ "_ab_source_file_url": "b.csv",
578
+ },
579
+ "stream": "stream2",
580
+ },
581
+ {
582
+ "data": {
583
+ "col1": "val21b",
584
+ "col2": "val22b",
585
+ "col3": "val23b",
586
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
587
+ "_ab_source_file_url": "b.csv",
588
+ },
589
+ "stream": "stream2",
590
+ },
591
+ ])
554
592
  .set_expected_analytics(
555
593
  [
556
594
  AirbyteAnalyticsTraceMessage(type="file-cdk-csv-stream-count", value="2"),
557
- AirbyteAnalyticsTraceMessage(type="file-cdk-jsonl-stream-count", value="1"),
558
595
  ]
559
596
  )
560
597
  ).build()
@@ -1450,7 +1487,6 @@ empty_schema_inference_scenario: TestScenario[InMemoryFilesSource] = (
1450
1487
  }
1451
1488
  )
1452
1489
  .set_expected_discover_error(AirbyteTracedException, FileBasedSourceError.SCHEMA_INFERENCE_ERROR.value)
1453
- .set_expected_records([])
1454
1490
  ).build()
1455
1491
 
1456
1492
  schemaless_csv_scenario: TestScenario[InMemoryFilesSource] = (
@@ -3009,6 +3045,61 @@ earlier_csv_scenario: TestScenario[InMemoryFilesSource] = (
3009
3045
  ]
3010
3046
  }
3011
3047
  )
3012
- .set_expected_records([])
3013
3048
  .set_expected_discover_error(AirbyteTracedException, FileBasedSourceError.SCHEMA_INFERENCE_ERROR.value)
3014
3049
  ).build()
3050
+
3051
+ csv_no_records_scenario: TestScenario[InMemoryFilesSource] = (
3052
+ TestScenarioBuilder[InMemoryFilesSource]()
3053
+ .set_name("csv_empty_no_records")
3054
+ .set_config(
3055
+ {
3056
+ "streams": [
3057
+ {
3058
+ "name": "stream1",
3059
+ "globs": ["*"],
3060
+ "validation_policy": "Emit Record",
3061
+ "input_schema": '{"col1": "boolean", "col2": "string"}',
3062
+ "format": {
3063
+ "filetype": "csv",
3064
+ "null_values": ["null"],
3065
+ },
3066
+ }
3067
+ ],
3068
+ "start_date": "2023-06-04T03:54:07.000000Z",
3069
+ }
3070
+ )
3071
+ .set_source_builder(
3072
+ FileBasedSourceBuilder()
3073
+ .set_files(
3074
+ {
3075
+ "a.csv": {
3076
+ "contents": [("col1", "col2")], # column headers, but no data rows
3077
+ "last_modified": "2023-06-05T03:54:07.000Z",
3078
+ }
3079
+ }
3080
+ )
3081
+ .set_file_type("csv")
3082
+ )
3083
+ .set_expected_catalog(
3084
+ {
3085
+ "streams": [
3086
+ {
3087
+ "default_cursor_field": ["_ab_source_file_last_modified"],
3088
+ "json_schema": {
3089
+ "type": "object",
3090
+ "properties": {
3091
+ "col1": {"type": "boolean"},
3092
+ "col2": {"type": "string"},
3093
+ "_ab_source_file_last_modified": {"type": "string"},
3094
+ "_ab_source_file_url": {"type": "string"},
3095
+ },
3096
+ },
3097
+ "name": "stream1",
3098
+ "source_defined_cursor": True,
3099
+ "supported_sync_modes": ["full_refresh", "incremental"],
3100
+ }
3101
+ ]
3102
+ }
3103
+ )
3104
+ .set_expected_records([])
3105
+ ).build()
@@ -23,7 +23,7 @@ from airbyte_cdk.sources.file_based.stream.concurrent.adapters import (
23
23
  FileBasedStreamPartition,
24
24
  FileBasedStreamPartitionGenerator,
25
25
  )
26
- from airbyte_cdk.sources.file_based.stream.concurrent.cursor import FileBasedNoopCursor
26
+ from airbyte_cdk.sources.file_based.stream.concurrent.cursor import FileBasedFinalStateCursor
27
27
  from airbyte_cdk.sources.message import InMemoryMessageRepository
28
28
  from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
29
29
  from airbyte_cdk.sources.streams.concurrent.exceptions import ExceptionWithDisplayMessage
@@ -36,7 +36,7 @@ _ANY_SYNC_MODE = SyncMode.full_refresh
36
36
  _ANY_STATE = {"state_key": "state_value"}
37
37
  _ANY_CURSOR_FIELD = ["a", "cursor", "key"]
38
38
  _STREAM_NAME = "stream"
39
- _ANY_CURSOR = Mock(spec=FileBasedNoopCursor)
39
+ _ANY_CURSOR = Mock(spec=FileBasedFinalStateCursor)
40
40
 
41
41
 
42
42
  @pytest.mark.parametrize(
@@ -165,7 +165,7 @@ class StreamFacadeTest(unittest.TestCase):
165
165
  supported_sync_modes=[SyncMode.full_refresh],
166
166
  )
167
167
  self._legacy_stream = DefaultFileBasedStream(
168
- cursor=FileBasedNoopCursor(MagicMock()),
168
+ cursor=FileBasedFinalStateCursor(stream_config=MagicMock(), stream_namespace=None, message_repository=Mock()),
169
169
  config=FileBasedStreamConfig(name="stream", format=CsvFormat()),
170
170
  catalog_schema={},
171
171
  stream_reader=MagicMock(),
@@ -50,6 +50,7 @@ from unit_tests.sources.file_based.scenarios.concurrent_incremental_scenarios im
50
50
  single_csv_no_input_state_scenario_concurrent,
51
51
  )
52
52
  from unit_tests.sources.file_based.scenarios.csv_scenarios import (
53
+ csv_analytics_scenario,
53
54
  csv_autogenerate_column_names_scenario,
54
55
  csv_custom_bool_values_scenario,
55
56
  csv_custom_delimiter_in_double_quotes_scenario,
@@ -61,6 +62,7 @@ from unit_tests.sources.file_based.scenarios.csv_scenarios import (
61
62
  csv_multi_stream_scenario,
62
63
  csv_newline_in_values_not_quoted_scenario,
63
64
  csv_newline_in_values_quoted_value_scenario,
65
+ csv_no_records_scenario,
64
66
  csv_single_stream_scenario,
65
67
  csv_skip_after_header_scenario,
66
68
  csv_skip_before_and_after_header_scenario,
@@ -75,7 +77,6 @@ from unit_tests.sources.file_based.scenarios.csv_scenarios import (
75
77
  invalid_csv_scenario,
76
78
  multi_csv_scenario,
77
79
  multi_csv_stream_n_file_exceeds_limit_for_inference,
78
- multi_format_analytics_scenario,
79
80
  multi_stream_custom_format,
80
81
  schemaless_csv_multi_stream_scenario,
81
82
  schemaless_csv_scenario,
@@ -152,7 +153,13 @@ from unit_tests.sources.file_based.scenarios.validation_policy_scenarios import
152
153
  )
153
154
  from unit_tests.sources.file_based.test_scenarios import verify_check, verify_discover, verify_read, verify_spec
154
155
 
155
- discover_scenarios = [
156
+ discover_failure_scenarios = [
157
+ earlier_csv_scenario,
158
+ empty_schema_inference_scenario,
159
+ ]
160
+
161
+ discover_success_scenarios = [
162
+ csv_no_records_scenario,
156
163
  csv_multi_stream_scenario,
157
164
  csv_single_stream_scenario,
158
165
  invalid_csv_scenario,
@@ -176,9 +183,7 @@ discover_scenarios = [
176
183
  single_csv_file_is_skipped_if_same_modified_at_as_in_history,
177
184
  single_csv_file_is_synced_if_modified_at_is_more_recent_than_in_history,
178
185
  csv_custom_format_scenario,
179
- earlier_csv_scenario,
180
186
  multi_stream_custom_format,
181
- empty_schema_inference_scenario,
182
187
  single_parquet_scenario,
183
188
  multi_parquet_scenario,
184
189
  parquet_various_types_scenario,
@@ -260,12 +265,14 @@ discover_scenarios = [
260
265
  single_csv_no_input_state_scenario_concurrent,
261
266
  ]
262
267
 
263
- read_scenarios = discover_scenarios + [
268
+ discover_scenarios = discover_failure_scenarios + discover_success_scenarios
269
+
270
+ read_scenarios = discover_success_scenarios + [
264
271
  emit_record_scenario_multi_stream,
265
272
  emit_record_scenario_single_stream,
266
273
  skip_record_scenario_multi_stream,
267
274
  skip_record_scenario_single_stream,
268
- multi_format_analytics_scenario,
275
+ csv_analytics_scenario,
269
276
  wait_for_rediscovery_scenario_multi_stream,
270
277
  wait_for_rediscovery_scenario_single_stream,
271
278
  ]
@@ -16,6 +16,7 @@ from airbyte_cdk.sources import AbstractSource
16
16
  from airbyte_cdk.sources.file_based.stream.concurrent.cursor import AbstractConcurrentFileBasedCursor
17
17
  from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput
18
18
  from airbyte_cdk.test.entrypoint_wrapper import read as entrypoint_read
19
+ from airbyte_cdk.utils import message_utils
19
20
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
20
21
  from airbyte_protocol.models import AirbyteLogMessage, AirbyteMessage, ConfiguredAirbyteCatalog
21
22
  from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenario
@@ -71,7 +72,7 @@ def assert_exception(expected_exception: type[BaseException], output: Entrypoint
71
72
 
72
73
 
73
74
  def _verify_read_output(output: EntrypointOutput, scenario: TestScenario[AbstractSource]) -> None:
74
- records, log_messages = output.records_and_state_messages, output.logs
75
+ records_and_state_messages, log_messages = output.records_and_state_messages, output.logs
75
76
  logs = [message.log for message in log_messages if message.log.level.value in scenario.log_levels]
76
77
  if scenario.expected_records is None:
77
78
  return
@@ -85,7 +86,7 @@ def _verify_read_output(output: EntrypointOutput, scenario: TestScenario[Abstrac
85
86
  ),
86
87
  )
87
88
  sorted_records = sorted(
88
- filter(lambda r: r.record, records),
89
+ filter(lambda r: r.record, records_and_state_messages),
89
90
  key=lambda record: ",".join(
90
91
  f"{k}={v}" for k, v in sorted(record.record.data.items(), key=lambda items: (items[0], items[1])) if k != "emitted_at"
91
92
  ),
@@ -104,7 +105,9 @@ def _verify_read_output(output: EntrypointOutput, scenario: TestScenario[Abstrac
104
105
  assert actual.record.stream == expected["stream"]
105
106
 
106
107
  expected_states = list(filter(lambda e: "data" not in e, expected_records))
107
- states = list(filter(lambda r: r.state, records))
108
+ states = list(filter(lambda r: r.state, records_and_state_messages))
109
+ assert len(states) > 0, "No state messages emitted. Successful syncs should emit at least one stream state."
110
+ _verify_state_record_counts(sorted_records, states)
108
111
 
109
112
  if hasattr(scenario.source, "cursor_cls") and issubclass(scenario.source.cursor_cls, AbstractConcurrentFileBasedCursor):
110
113
  # Only check the last state emitted because we don't know the order the others will be in.
@@ -125,8 +128,34 @@ def _verify_read_output(output: EntrypointOutput, scenario: TestScenario[Abstrac
125
128
  _verify_analytics(analytics, scenario.expected_analytics)
126
129
 
127
130
 
131
+ def _verify_state_record_counts(records: List[AirbyteMessage], states: List[AirbyteMessage]) -> None:
132
+ actual_record_counts = {}
133
+ for record in records:
134
+ stream_descriptor = message_utils.get_stream_descriptor(record)
135
+ actual_record_counts[stream_descriptor] = actual_record_counts.get(stream_descriptor, 0) + 1
136
+
137
+ state_record_count_sums = {}
138
+ for state_message in states:
139
+ stream_descriptor = message_utils.get_stream_descriptor(state_message)
140
+ state_record_count_sums[stream_descriptor] = (
141
+ state_record_count_sums.get(stream_descriptor, 0)
142
+ + state_message.state.sourceStats.recordCount
143
+ )
144
+
145
+ for stream, actual_count in actual_record_counts.items():
146
+ assert state_record_count_sums.get(stream) == actual_count
147
+
148
+ # We can have extra keys in state_record_count_sums if we processed a stream and reported 0 records
149
+ extra_keys = state_record_count_sums.keys() - actual_record_counts.keys()
150
+ for stream in extra_keys:
151
+ assert state_record_count_sums[stream] == 0
152
+
153
+
128
154
  def _verify_analytics(analytics: List[AirbyteMessage], expected_analytics: Optional[List[AirbyteAnalyticsTraceMessage]]) -> None:
129
155
  if expected_analytics:
156
+ assert len(analytics) == len(
157
+ expected_analytics), \
158
+ f"Number of actual analytics messages ({len(analytics)}) did not match expected ({len(expected_analytics)})"
130
159
  for actual, expected in zip(analytics, expected_analytics):
131
160
  actual_type, actual_value = actual.trace.analytics.type, actual.trace.analytics.value
132
161
  expected_type = expected.type
@@ -21,7 +21,7 @@ from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageReposi
21
21
  from airbyte_cdk.sources.source import TState
22
22
  from airbyte_cdk.sources.streams import Stream
23
23
  from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
24
- from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField, NoopCursor
24
+ from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField, FinalStateCursor
25
25
  from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import EpochValueConcurrentStreamStateConverter
26
26
  from airbyte_protocol.models import ConfiguredAirbyteStream
27
27
  from unit_tests.sources.file_based.scenarios.scenario_builder import SourceBuilder
@@ -83,7 +83,7 @@ class StreamFacadeSource(ConcurrentSourceAdapter):
83
83
  None,
84
84
  )
85
85
  if self._cursor_field
86
- else NoopCursor(),
86
+ else FinalStateCursor(stream_name=stream.name, stream_namespace=stream.namespace, message_repository=self.message_repository),
87
87
  )
88
88
  for stream, state in zip(self._streams, stream_states)
89
89
  ]
@@ -4,7 +4,7 @@
4
4
  import logging
5
5
 
6
6
  from airbyte_cdk.sources.message import InMemoryMessageRepository
7
- from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor
7
+ from airbyte_cdk.sources.streams.concurrent.cursor import FinalStateCursor
8
8
  from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
9
9
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
10
10
  from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenarioBuilder
@@ -15,6 +15,8 @@ from unit_tests.sources.streams.concurrent.scenarios.thread_based_concurrent_str
15
15
  InMemoryPartitionGenerator,
16
16
  )
17
17
 
18
+ _message_repository = InMemoryMessageRepository()
19
+
18
20
  _id_only_stream = DefaultStream(
19
21
  partition_generator=InMemoryPartitionGenerator(
20
22
  [InMemoryPartition("partition1", "stream1", None, [Record({"id": "1"}, "stream1"), Record({"id": "2"}, "stream1")])]
@@ -30,7 +32,7 @@ _id_only_stream = DefaultStream(
30
32
  primary_key=[],
31
33
  cursor_field=None,
32
34
  logger=logging.getLogger("test_logger"),
33
- cursor=NoopCursor(),
35
+ cursor=FinalStateCursor(stream_name="stream1", stream_namespace=None, message_repository=_message_repository),
34
36
  )
35
37
 
36
38
  _id_only_stream_with_slice_logger = DefaultStream(
@@ -48,7 +50,7 @@ _id_only_stream_with_slice_logger = DefaultStream(
48
50
  primary_key=[],
49
51
  cursor_field=None,
50
52
  logger=logging.getLogger("test_logger"),
51
- cursor=NoopCursor(),
53
+ cursor=FinalStateCursor(stream_name="stream1", stream_namespace=None, message_repository=_message_repository),
52
54
  )
53
55
 
54
56
  _id_only_stream_with_primary_key = DefaultStream(
@@ -66,7 +68,7 @@ _id_only_stream_with_primary_key = DefaultStream(
66
68
  primary_key=["id"],
67
69
  cursor_field=None,
68
70
  logger=logging.getLogger("test_logger"),
69
- cursor=NoopCursor(),
71
+ cursor=FinalStateCursor(stream_name="stream1", stream_namespace=None, message_repository=_message_repository),
70
72
  )
71
73
 
72
74
  _id_only_stream_multiple_partitions = DefaultStream(
@@ -87,7 +89,7 @@ _id_only_stream_multiple_partitions = DefaultStream(
87
89
  primary_key=[],
88
90
  cursor_field=None,
89
91
  logger=logging.getLogger("test_logger"),
90
- cursor=NoopCursor(),
92
+ cursor=FinalStateCursor(stream_name="stream1", stream_namespace=None, message_repository=_message_repository),
91
93
  )
92
94
 
93
95
  _id_only_stream_multiple_partitions_concurrency_level_two = DefaultStream(
@@ -108,7 +110,7 @@ _id_only_stream_multiple_partitions_concurrency_level_two = DefaultStream(
108
110
  primary_key=[],
109
111
  cursor_field=None,
110
112
  logger=logging.getLogger("test_logger"),
111
- cursor=NoopCursor(),
113
+ cursor=FinalStateCursor(stream_name="stream1", stream_namespace=None, message_repository=_message_repository),
112
114
  )
113
115
 
114
116
  _stream_raising_exception = DefaultStream(
@@ -126,7 +128,7 @@ _stream_raising_exception = DefaultStream(
126
128
  primary_key=[],
127
129
  cursor_field=None,
128
130
  logger=logging.getLogger("test_logger"),
129
- cursor=NoopCursor(),
131
+ cursor=FinalStateCursor(stream_name="stream1", stream_namespace=None, message_repository=_message_repository),
130
132
  )
131
133
 
132
134
  test_concurrent_cdk_single_stream = (
@@ -140,7 +142,7 @@ test_concurrent_cdk_single_stream = (
140
142
  _id_only_stream,
141
143
  ]
142
144
  )
143
- .set_message_repository(InMemoryMessageRepository())
145
+ .set_message_repository(_message_repository)
144
146
  )
145
147
  .set_expected_records(
146
148
  [
@@ -193,7 +195,7 @@ test_concurrent_cdk_single_stream_with_primary_key = (
193
195
  _id_only_stream_with_primary_key,
194
196
  ]
195
197
  )
196
- .set_message_repository(InMemoryMessageRepository())
198
+ .set_message_repository(_message_repository)
197
199
  )
198
200
  .set_expected_records(
199
201
  [
@@ -253,11 +255,11 @@ test_concurrent_cdk_multiple_streams = (
253
255
  primary_key=[],
254
256
  cursor_field=None,
255
257
  logger=logging.getLogger("test_logger"),
256
- cursor=NoopCursor(),
258
+ cursor=FinalStateCursor(stream_name="stream2", stream_namespace=None, message_repository=_message_repository),
257
259
  ),
258
260
  ]
259
261
  )
260
- .set_message_repository(InMemoryMessageRepository())
262
+ .set_message_repository(_message_repository)
261
263
  )
262
264
  .set_expected_records(
263
265
  [
@@ -308,7 +310,7 @@ test_concurrent_cdk_partition_raises_exception = (
308
310
  _stream_raising_exception,
309
311
  ]
310
312
  )
311
- .set_message_repository(InMemoryMessageRepository())
313
+ .set_message_repository(_message_repository)
312
314
  )
313
315
  .set_expected_records(
314
316
  [
@@ -346,7 +348,7 @@ test_concurrent_cdk_single_stream_multiple_partitions = (
346
348
  _id_only_stream_multiple_partitions,
347
349
  ]
348
350
  )
349
- .set_message_repository(InMemoryMessageRepository())
351
+ .set_message_repository(_message_repository)
350
352
  )
351
353
  .set_expected_records(
352
354
  [
@@ -386,7 +388,7 @@ test_concurrent_cdk_single_stream_multiple_partitions_concurrency_level_two = (
386
388
  _id_only_stream_multiple_partitions_concurrency_level_two,
387
389
  ]
388
390
  )
389
- .set_message_repository(InMemoryMessageRepository())
391
+ .set_message_repository(_message_repository)
390
392
  )
391
393
  .set_expected_records(
392
394
  [
@@ -8,11 +8,11 @@ from typing import Any, Iterable, List, Mapping, Optional, Tuple, Union
8
8
  from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConnectorSpecification, DestinationSyncMode, SyncMode
9
9
  from airbyte_cdk.sources.concurrent_source.concurrent_source import ConcurrentSource
10
10
  from airbyte_cdk.sources.concurrent_source.concurrent_source_adapter import ConcurrentSourceAdapter
11
- from airbyte_cdk.sources.message import MessageRepository
11
+ from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
12
12
  from airbyte_cdk.sources.streams import Stream
13
13
  from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
14
14
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import AbstractAvailabilityStrategy, StreamAvailability, StreamAvailable
15
- from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor
15
+ from airbyte_cdk.sources.streams.concurrent.cursor import FinalStateCursor
16
16
  from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
17
17
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
18
18
  from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
@@ -42,13 +42,14 @@ class ConcurrentCdkSource(ConcurrentSourceAdapter):
42
42
  concurrent_source = ConcurrentSource.create(1, 1, streams[0]._logger, NeverLogSliceLogger(), message_repository)
43
43
  super().__init__(concurrent_source)
44
44
  self._streams = streams
45
+ self._message_repository = message_repository
45
46
 
46
47
  def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
47
48
  # Check is not verified because it is up to the source to implement this method
48
49
  return True, None
49
50
 
50
51
  def streams(self, config: Mapping[str, Any]) -> List[Stream]:
51
- return [StreamFacade(s, LegacyStream(), NoopCursor(), NeverLogSliceLogger(), s._logger) for s in self._streams]
52
+ return [StreamFacade(s, LegacyStream(), FinalStateCursor(stream_name=s.name, stream_namespace=s.namespace, message_repository=self.message_repository), NeverLogSliceLogger(), s._logger) for s in self._streams]
52
53
 
53
54
  def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification:
54
55
  return ConnectorSpecification(connectionSpecification={})
@@ -57,7 +58,7 @@ class ConcurrentCdkSource(ConcurrentSourceAdapter):
57
58
  return ConfiguredAirbyteCatalog(
58
59
  streams=[
59
60
  ConfiguredAirbyteStream(
60
- stream=StreamFacade(s, LegacyStream(), NoopCursor(), NeverLogSliceLogger(), s._logger).as_airbyte_stream(),
61
+ stream=StreamFacade(s, LegacyStream(), FinalStateCursor(stream_name=s.name, stream_namespace=s.namespace, message_repository=InMemoryMessageRepository()), NeverLogSliceLogger(), s._logger).as_airbyte_stream(),
61
62
  sync_mode=SyncMode.full_refresh,
62
63
  destination_sync_mode=DestinationSyncMode.overwrite,
63
64
  )
@@ -5,8 +5,9 @@ import unittest
5
5
  from unittest.mock import Mock
6
6
 
7
7
  from airbyte_cdk.models import AirbyteStream, SyncMode
8
+ from airbyte_cdk.sources.message import InMemoryMessageRepository
8
9
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import STREAM_AVAILABLE
9
- from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, NoopCursor
10
+ from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, FinalStateCursor
10
11
  from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
11
12
 
12
13
 
@@ -20,6 +21,7 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
20
21
  self._cursor_field = None
21
22
  self._logger = Mock()
22
23
  self._cursor = Mock(spec=Cursor)
24
+ self._message_repository = InMemoryMessageRepository()
23
25
  self._stream = DefaultStream(
24
26
  self._partition_generator,
25
27
  self._name,
@@ -28,7 +30,7 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
28
30
  self._primary_key,
29
31
  self._cursor_field,
30
32
  self._logger,
31
- NoopCursor(),
33
+ FinalStateCursor(stream_name=self._name, stream_namespace=None, message_repository=self._message_repository),
32
34
  )
33
35
 
34
36
  def test_get_json_schema(self):
@@ -89,7 +91,7 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
89
91
  ["id"],
90
92
  self._cursor_field,
91
93
  self._logger,
92
- NoopCursor(),
94
+ FinalStateCursor(stream_name=self._name, stream_namespace=None, message_repository=self._message_repository),
93
95
  )
94
96
 
95
97
  expected_airbyte_stream = AirbyteStream(
@@ -121,7 +123,7 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
121
123
  ["id_a", "id_b"],
122
124
  self._cursor_field,
123
125
  self._logger,
124
- NoopCursor(),
126
+ FinalStateCursor(stream_name=self._name, stream_namespace=None, message_repository=self._message_repository),
125
127
  )
126
128
 
127
129
  expected_airbyte_stream = AirbyteStream(
@@ -153,7 +155,7 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
153
155
  self._primary_key,
154
156
  "date",
155
157
  self._logger,
156
- NoopCursor(),
158
+ FinalStateCursor(stream_name=self._name, stream_namespace=None, message_repository=self._message_repository),
157
159
  )
158
160
 
159
161
  expected_airbyte_stream = AirbyteStream(
@@ -178,7 +180,7 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
178
180
  self._primary_key,
179
181
  self._cursor_field,
180
182
  self._logger,
181
- NoopCursor(),
183
+ FinalStateCursor(stream_name=self._name, stream_namespace=None, message_repository=self._message_repository),
182
184
  namespace="test",
183
185
  )
184
186
  expected_airbyte_stream = AirbyteStream(
@@ -26,7 +26,7 @@ from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
26
26
  from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
27
27
  from airbyte_cdk.sources.streams import Stream
28
28
  from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
29
- from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, NoopCursor
29
+ from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, FinalStateCursor
30
30
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
31
31
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
32
32
  from airbyte_cdk.sources.streams.core import StreamData
@@ -105,8 +105,9 @@ def _stream(slice_to_partition_mapping, slice_logger, logger, message_repository
105
105
  return _MockStream(slice_to_partition_mapping)
106
106
 
107
107
 
108
- def _concurrent_stream(slice_to_partition_mapping, slice_logger, logger, message_repository, cursor: Cursor = NoopCursor()):
108
+ def _concurrent_stream(slice_to_partition_mapping, slice_logger, logger, message_repository, cursor: Optional[Cursor] = None):
109
109
  stream = _stream(slice_to_partition_mapping, slice_logger, logger, message_repository)
110
+ cursor = cursor or FinalStateCursor(stream_name=stream.name, stream_namespace=stream.namespace, message_repository=message_repository)
110
111
  source = Mock()
111
112
  source._slice_logger = slice_logger
112
113
  source.message_repository = message_repository