airbyte-cdk 0.61.2__py3-none-any.whl → 0.62.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. airbyte_cdk/sources/abstract_source.py +14 -33
  2. airbyte_cdk/sources/connector_state_manager.py +16 -4
  3. airbyte_cdk/sources/file_based/file_based_source.py +87 -35
  4. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +3 -0
  5. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +15 -13
  6. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -0
  7. airbyte_cdk/sources/file_based/stream/concurrent/{cursor.py → cursor/abstract_concurrent_file_based_cursor.py} +22 -44
  8. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +279 -0
  9. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_noop_cursor.py +56 -0
  10. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +11 -2
  11. airbyte_cdk/test/mock_http/mocker.py +3 -1
  12. airbyte_cdk/test/mock_http/response.py +9 -1
  13. airbyte_cdk/utils/traced_exception.py +1 -16
  14. {airbyte_cdk-0.61.2.dist-info → airbyte_cdk-0.62.1.dist-info}/METADATA +1 -1
  15. {airbyte_cdk-0.61.2.dist-info → airbyte_cdk-0.62.1.dist-info}/RECORD +33 -26
  16. unit_tests/sources/file_based/helpers.py +5 -0
  17. unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +2860 -0
  18. unit_tests/sources/file_based/scenarios/incremental_scenarios.py +11 -0
  19. unit_tests/sources/file_based/scenarios/scenario_builder.py +6 -2
  20. unit_tests/sources/file_based/stream/concurrent/__init__.py +0 -0
  21. unit_tests/sources/file_based/stream/concurrent/test_adapters.py +365 -0
  22. unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +462 -0
  23. unit_tests/sources/file_based/test_file_based_scenarios.py +45 -0
  24. unit_tests/sources/file_based/test_scenarios.py +16 -8
  25. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +13 -2
  26. unit_tests/sources/test_abstract_source.py +36 -170
  27. unit_tests/sources/test_connector_state_manager.py +20 -13
  28. unit_tests/sources/test_integration_source.py +8 -25
  29. unit_tests/sources/test_source_read.py +1 -1
  30. unit_tests/test/mock_http/test_mocker.py +3 -1
  31. {airbyte_cdk-0.61.2.dist-info → airbyte_cdk-0.62.1.dist-info}/LICENSE.txt +0 -0
  32. {airbyte_cdk-0.61.2.dist-info → airbyte_cdk-0.62.1.dist-info}/WHEEL +0 -0
  33. {airbyte_cdk-0.61.2.dist-info → airbyte_cdk-0.62.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,2860 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from airbyte_cdk.sources.file_based.stream.concurrent.cursor import FileBasedConcurrentCursor
6
+ from airbyte_cdk.test.state_builder import StateBuilder
7
+ from unit_tests.sources.file_based.helpers import LowHistoryLimitConcurrentCursor
8
+ from unit_tests.sources.file_based.scenarios.file_based_source_builder import FileBasedSourceBuilder
9
+ from unit_tests.sources.file_based.scenarios.scenario_builder import IncrementalScenarioConfig, TestScenarioBuilder
10
+
11
+ single_csv_input_state_is_earlier_scenario_concurrent = (
12
+ TestScenarioBuilder()
13
+ .set_name("single_csv_input_state_is_earlier_concurrent")
14
+ .set_config(
15
+ {
16
+ "streams": [
17
+ {
18
+ "name": "stream1",
19
+ "format": {"filetype": "csv"},
20
+ "globs": ["*.csv"],
21
+ "validation_policy": "Emit Record",
22
+ }
23
+ ]
24
+ }
25
+ )
26
+ .set_source_builder(
27
+ FileBasedSourceBuilder()
28
+ .set_files(
29
+ {
30
+ "a.csv": {
31
+ "contents": [
32
+ ("col1", "col2"),
33
+ ("val11", "val12"),
34
+ ("val21", "val22"),
35
+ ],
36
+ "last_modified": "2023-06-05T03:54:07.000Z",
37
+ }
38
+ }
39
+ )
40
+ .set_file_type("csv")
41
+ .set_cursor_cls(FileBasedConcurrentCursor)
42
+ )
43
+ .set_incremental_scenario_config(
44
+ IncrementalScenarioConfig(
45
+ input_state=StateBuilder().with_stream_state("stream1", {
46
+ "history": {"some_old_file.csv": "2023-06-01T03:54:07.000000Z"},
47
+ "_ab_source_file_last_modified": "2023-06-01T03:54:07.000000Z_some_old_file.csv"
48
+ }).build(),
49
+ )
50
+ )
51
+ .set_expected_records(
52
+ [
53
+ {
54
+ "data": {
55
+ "col1": "val11",
56
+ "col2": "val12",
57
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
58
+ "_ab_source_file_url": "a.csv",
59
+ },
60
+ "stream": "stream1",
61
+ },
62
+ {
63
+ "data": {
64
+ "col1": "val21",
65
+ "col2": "val22",
66
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
67
+ "_ab_source_file_url": "a.csv",
68
+ },
69
+ "stream": "stream1",
70
+ },
71
+ {
72
+ "stream1": {
73
+ "history": {"some_old_file.csv": "2023-06-01T03:54:07.000000Z", "a.csv": "2023-06-05T03:54:07.000000Z"},
74
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv",
75
+ }
76
+ },
77
+ ]
78
+ )
79
+ .set_expected_catalog(
80
+ {
81
+ "streams": [
82
+ {
83
+ "default_cursor_field": ["_ab_source_file_last_modified"],
84
+ "source_defined_cursor": True,
85
+ "supported_sync_modes": ["full_refresh", "incremental"],
86
+ "json_schema": {
87
+ "type": "object",
88
+ "properties": {
89
+ "col1": {
90
+ "type": ["null", "string"],
91
+ },
92
+ "col2": {
93
+ "type": ["null", "string"],
94
+ },
95
+ "_ab_source_file_last_modified": {"type": "string"},
96
+ "_ab_source_file_url": {"type": "string"},
97
+ },
98
+ },
99
+ "name": "stream1",
100
+ }
101
+ ]
102
+ }
103
+ )
104
+ ).build()
105
+
106
+ single_csv_file_is_skipped_if_same_modified_at_as_in_history_concurrent = (
107
+ TestScenarioBuilder()
108
+ .set_name("single_csv_file_is_skipped_if_same_modified_at_as_in_history_concurrent")
109
+ .set_config(
110
+ {
111
+ "streams": [
112
+ {
113
+ "name": "stream1",
114
+ "format": {"filetype": "csv"},
115
+ "globs": ["*.csv"],
116
+ "validation_policy": "Emit Record",
117
+ }
118
+ ]
119
+ }
120
+ )
121
+ .set_source_builder(
122
+ FileBasedSourceBuilder()
123
+ .set_files(
124
+ {
125
+ "a.csv": {
126
+ "contents": [
127
+ ("col1", "col2"),
128
+ ("val11", "val12"),
129
+ ("val21", "val22"),
130
+ ],
131
+ "last_modified": "2023-06-05T03:54:07.000Z",
132
+ }
133
+ }
134
+ )
135
+ .set_file_type("csv")
136
+ .set_cursor_cls(FileBasedConcurrentCursor)
137
+ )
138
+ .set_incremental_scenario_config(
139
+ IncrementalScenarioConfig(
140
+ input_state=StateBuilder().with_stream_state("stream1", {
141
+ "history": {"a.csv": "2023-06-05T03:54:07.000000Z"},
142
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv",
143
+ }).build(),
144
+ )
145
+ )
146
+ .set_expected_records(
147
+ [
148
+ {
149
+ "stream1": {
150
+ "history": {"a.csv": "2023-06-05T03:54:07.000000Z"},
151
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv",
152
+ }
153
+ }
154
+ ]
155
+ )
156
+ .set_expected_catalog(
157
+ {
158
+ "streams": [
159
+ {
160
+ "default_cursor_field": ["_ab_source_file_last_modified"],
161
+ "source_defined_cursor": True,
162
+ "supported_sync_modes": ["full_refresh", "incremental"],
163
+ "json_schema": {
164
+ "type": "object",
165
+ "properties": {
166
+ "col1": {
167
+ "type": ["null", "string"],
168
+ },
169
+ "col2": {
170
+ "type": ["null", "string"],
171
+ },
172
+ "_ab_source_file_last_modified": {"type": "string"},
173
+ "_ab_source_file_url": {"type": "string"},
174
+ },
175
+ },
176
+ "name": "stream1",
177
+ }
178
+ ]
179
+ }
180
+ )
181
+ ).build()
182
+
183
+ single_csv_file_is_synced_if_modified_at_is_more_recent_than_in_history_concurrent = (
184
+ TestScenarioBuilder()
185
+ .set_name("single_csv_file_is_synced_if_modified_at_is_more_recent_than_in_history_concurrent")
186
+ .set_config(
187
+ {
188
+ "streams": [
189
+ {
190
+ "name": "stream1",
191
+ "format": {"filetype": "csv"},
192
+ "globs": ["*.csv"],
193
+ "validation_policy": "Emit Record",
194
+ }
195
+ ]
196
+ }
197
+ )
198
+ .set_source_builder(
199
+ FileBasedSourceBuilder()
200
+ .set_files(
201
+ {
202
+ "a.csv": {
203
+ "contents": [
204
+ ("col1", "col2"),
205
+ ("val11", "val12"),
206
+ ("val21", "val22"),
207
+ ],
208
+ "last_modified": "2023-06-05T03:54:07.000Z",
209
+ }
210
+ }
211
+ )
212
+ .set_file_type("csv")
213
+ .set_cursor_cls(FileBasedConcurrentCursor)
214
+ )
215
+ .set_incremental_scenario_config(
216
+ IncrementalScenarioConfig(
217
+ input_state=StateBuilder().with_stream_state("stream1", {
218
+ "history": {"a.csv": "2023-06-01T03:54:07.000000Z"},
219
+ "_ab_source_file_last_modified": "2023-06-01T03:54:07.000000Z_a.csv",
220
+ }).build(),
221
+ )
222
+ )
223
+ .set_expected_records(
224
+ [
225
+ {
226
+ "data": {
227
+ "col1": "val11",
228
+ "col2": "val12",
229
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
230
+ "_ab_source_file_url": "a.csv",
231
+ },
232
+ "stream": "stream1",
233
+ },
234
+ {
235
+ "data": {
236
+ "col1": "val21",
237
+ "col2": "val22",
238
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
239
+ "_ab_source_file_url": "a.csv",
240
+ },
241
+ "stream": "stream1",
242
+ },
243
+ {
244
+ "stream1": {
245
+ "history": {"a.csv": "2023-06-05T03:54:07.000000Z"},
246
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv",
247
+ }
248
+ },
249
+ ]
250
+ )
251
+ .set_expected_catalog(
252
+ {
253
+ "streams": [
254
+ {
255
+ "default_cursor_field": ["_ab_source_file_last_modified"],
256
+ "source_defined_cursor": True,
257
+ "supported_sync_modes": ["full_refresh", "incremental"],
258
+ "json_schema": {
259
+ "type": "object",
260
+ "properties": {
261
+ "col1": {
262
+ "type": ["null", "string"],
263
+ },
264
+ "col2": {
265
+ "type": ["null", "string"],
266
+ },
267
+ "_ab_source_file_last_modified": {"type": "string"},
268
+ "_ab_source_file_url": {"type": "string"},
269
+ },
270
+ },
271
+ "name": "stream1",
272
+ }
273
+ ]
274
+ }
275
+ )
276
+ ).build()
277
+
278
+ single_csv_no_input_state_scenario_concurrent = (
279
+ TestScenarioBuilder()
280
+ .set_name("single_csv_input_state_is_earlier_again_concurrent")
281
+ .set_config(
282
+ {
283
+ "streams": [
284
+ {
285
+ "name": "stream1",
286
+ "format": {"filetype": "csv"},
287
+ "globs": ["*.csv"],
288
+ "validation_policy": "Emit Record",
289
+ }
290
+ ]
291
+ }
292
+ )
293
+ .set_source_builder(
294
+ FileBasedSourceBuilder()
295
+ .set_files(
296
+ {
297
+ "a.csv": {
298
+ "contents": [
299
+ ("col1", "col2"),
300
+ ("val11", "val12"),
301
+ ("val21", "val22"),
302
+ ],
303
+ "last_modified": "2023-06-05T03:54:07.000000Z",
304
+ }
305
+ }
306
+ )
307
+ .set_file_type("csv")
308
+ .set_cursor_cls(FileBasedConcurrentCursor)
309
+ )
310
+ .set_expected_catalog(
311
+ {
312
+ "streams": [
313
+ {
314
+ "default_cursor_field": ["_ab_source_file_last_modified"],
315
+ "source_defined_cursor": True,
316
+ "supported_sync_modes": ["full_refresh", "incremental"],
317
+ "json_schema": {
318
+ "type": "object",
319
+ "properties": {
320
+ "col1": {
321
+ "type": ["null", "string"],
322
+ },
323
+ "col2": {
324
+ "type": ["null", "string"],
325
+ },
326
+ "_ab_source_file_last_modified": {"type": "string"},
327
+ "_ab_source_file_url": {"type": "string"},
328
+ },
329
+ },
330
+ "name": "stream1",
331
+ }
332
+ ]
333
+ }
334
+ )
335
+ .set_expected_records(
336
+ [
337
+ {
338
+ "data": {
339
+ "col1": "val11",
340
+ "col2": "val12",
341
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
342
+ "_ab_source_file_url": "a.csv",
343
+ },
344
+ "stream": "stream1",
345
+ },
346
+ {
347
+ "data": {
348
+ "col1": "val21",
349
+ "col2": "val22",
350
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
351
+ "_ab_source_file_url": "a.csv",
352
+ },
353
+ "stream": "stream1",
354
+ },
355
+ {
356
+ "stream1": {
357
+ "history": {"a.csv": "2023-06-05T03:54:07.000000Z"},
358
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv",
359
+ }
360
+ },
361
+ ]
362
+ )
363
+ .set_incremental_scenario_config(
364
+ IncrementalScenarioConfig(
365
+ input_state=[],
366
+ )
367
+ )
368
+ ).build()
369
+
370
+ multi_csv_same_timestamp_scenario_concurrent = (
371
+ TestScenarioBuilder()
372
+ .set_name("multi_csv_same_timestamp_concurrent")
373
+ .set_config(
374
+ {
375
+ "streams": [
376
+ {
377
+ "name": "stream1",
378
+ "format": {"filetype": "csv"},
379
+ "globs": ["*.csv"],
380
+ "validation_policy": "Emit Record",
381
+ }
382
+ ]
383
+ }
384
+ )
385
+ .set_source_builder(
386
+ FileBasedSourceBuilder()
387
+ .set_files(
388
+ {
389
+ "a.csv": {
390
+ "contents": [
391
+ ("col1", "col2"),
392
+ ("val11a", "val12a"),
393
+ ("val21a", "val22a"),
394
+ ],
395
+ "last_modified": "2023-06-05T03:54:07.000000Z",
396
+ },
397
+ "b.csv": {
398
+ "contents": [
399
+ ("col1", "col2", "col3"),
400
+ ("val11b", "val12b", "val13b"),
401
+ ("val21b", "val22b", "val23b"),
402
+ ],
403
+ "last_modified": "2023-06-05T03:54:07.000000Z",
404
+ },
405
+ }
406
+ )
407
+ .set_file_type("csv")
408
+ .set_cursor_cls(FileBasedConcurrentCursor)
409
+ )
410
+ .set_expected_catalog(
411
+ {
412
+ "streams": [
413
+ {
414
+ "default_cursor_field": ["_ab_source_file_last_modified"],
415
+ "json_schema": {
416
+ "type": "object",
417
+ "properties": {
418
+ "col1": {
419
+ "type": ["null", "string"],
420
+ },
421
+ "col2": {
422
+ "type": ["null", "string"],
423
+ },
424
+ "col3": {
425
+ "type": ["null", "string"],
426
+ },
427
+ "_ab_source_file_last_modified": {"type": "string"},
428
+ "_ab_source_file_url": {"type": "string"},
429
+ },
430
+ },
431
+ "name": "stream1",
432
+ "source_defined_cursor": True,
433
+ "supported_sync_modes": ["full_refresh", "incremental"],
434
+ }
435
+ ]
436
+ }
437
+ )
438
+ .set_expected_records(
439
+ [
440
+ {
441
+ "data": {
442
+ "col1": "val11a",
443
+ "col2": "val12a",
444
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
445
+ "_ab_source_file_url": "a.csv",
446
+ },
447
+ "stream": "stream1",
448
+ },
449
+ {
450
+ "data": {
451
+ "col1": "val21a",
452
+ "col2": "val22a",
453
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
454
+ "_ab_source_file_url": "a.csv",
455
+ },
456
+ "stream": "stream1",
457
+ },
458
+ {
459
+ "data": {
460
+ "col1": "val11b",
461
+ "col2": "val12b",
462
+ "col3": "val13b",
463
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
464
+ "_ab_source_file_url": "b.csv",
465
+ },
466
+ "stream": "stream1",
467
+ },
468
+ {
469
+ "data": {
470
+ "col1": "val21b",
471
+ "col2": "val22b",
472
+ "col3": "val23b",
473
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
474
+ "_ab_source_file_url": "b.csv",
475
+ },
476
+ "stream": "stream1",
477
+ },
478
+ {
479
+ "stream1": {
480
+ "history": {"a.csv": "2023-06-05T03:54:07.000000Z", "b.csv": "2023-06-05T03:54:07.000000Z"},
481
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_b.csv",
482
+ }
483
+ },
484
+ ]
485
+ )
486
+ .set_incremental_scenario_config(
487
+ IncrementalScenarioConfig(
488
+ input_state=[],
489
+ )
490
+ )
491
+ ).build()
492
+
493
+ single_csv_input_state_is_later_scenario_concurrent = (
494
+ TestScenarioBuilder()
495
+ .set_name("single_csv_input_state_is_later_concurrent")
496
+ .set_config(
497
+ {
498
+ "streams": [
499
+ {
500
+ "name": "stream1",
501
+ "format": {"filetype": "csv"},
502
+ "globs": ["*.csv"],
503
+ "validation_policy": "Emit Record",
504
+ }
505
+ ]
506
+ }
507
+ )
508
+ .set_source_builder(
509
+ FileBasedSourceBuilder()
510
+ .set_files(
511
+ {
512
+ "a.csv": {
513
+ "contents": [
514
+ ("col1", "col2"),
515
+ ("val11", "val12"),
516
+ ("val21", "val22"),
517
+ ],
518
+ "last_modified": "2023-06-05T03:54:07.000000Z",
519
+ }
520
+ }
521
+ )
522
+ .set_file_type("csv")
523
+ .set_cursor_cls(FileBasedConcurrentCursor)
524
+ )
525
+ .set_expected_catalog(
526
+ {
527
+ "streams": [
528
+ {
529
+ "default_cursor_field": ["_ab_source_file_last_modified"],
530
+ "source_defined_cursor": True,
531
+ "supported_sync_modes": ["full_refresh", "incremental"],
532
+ "json_schema": {
533
+ "type": "object",
534
+ "properties": {
535
+ "col1": {
536
+ "type": ["null", "string"],
537
+ },
538
+ "col2": {
539
+ "type": ["null", "string"],
540
+ },
541
+ "_ab_source_file_last_modified": {"type": "string"},
542
+ "_ab_source_file_url": {"type": "string"},
543
+ },
544
+ },
545
+ "name": "stream1",
546
+ }
547
+ ]
548
+ }
549
+ )
550
+ .set_expected_records(
551
+ [
552
+ {
553
+ "data": {
554
+ "col1": "val11",
555
+ "col2": "val12",
556
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
557
+ "_ab_source_file_url": "a.csv",
558
+ },
559
+ "stream": "stream1",
560
+ },
561
+ {
562
+ "data": {
563
+ "col1": "val21",
564
+ "col2": "val22",
565
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
566
+ "_ab_source_file_url": "a.csv",
567
+ },
568
+ "stream": "stream1",
569
+ },
570
+ {
571
+ "stream1": {
572
+ "history": {
573
+ "recent_file.csv": "2023-07-15T23:59:59.000000Z",
574
+ "a.csv": "2023-06-05T03:54:07.000000Z",
575
+ },
576
+ "_ab_source_file_last_modified": "2023-07-15T23:59:59.000000Z_recent_file.csv",
577
+ }
578
+ },
579
+ ]
580
+ )
581
+ .set_incremental_scenario_config(
582
+ IncrementalScenarioConfig(
583
+ input_state=StateBuilder().with_stream_state(
584
+ "stream1", {
585
+ "history": {"recent_file.csv": "2023-07-15T23:59:59.000000Z"},
586
+ "_ab_source_file_last_modified": "2023-07-15T23:59:59.000000Z_recent_file.csv",
587
+ },
588
+ ).build(),
589
+ )
590
+ )
591
+ ).build()
592
+
593
+ multi_csv_different_timestamps_scenario_concurrent = (
594
+ TestScenarioBuilder()
595
+ .set_name("multi_csv_stream_different_timestamps_concurrent")
596
+ .set_config(
597
+ {
598
+ "streams": [
599
+ {
600
+ "name": "stream1",
601
+ "format": {"filetype": "csv"},
602
+ "globs": ["*.csv"],
603
+ "validation_policy": "Emit Record",
604
+ }
605
+ ]
606
+ }
607
+ )
608
+ .set_source_builder(
609
+ FileBasedSourceBuilder()
610
+ .set_files(
611
+ {
612
+ "a.csv": {
613
+ "contents": [
614
+ ("col1", "col2"),
615
+ ("val11a", "val12a"),
616
+ ("val21a", "val22a"),
617
+ ],
618
+ "last_modified": "2023-06-04T03:54:07.000000Z",
619
+ },
620
+ "b.csv": {
621
+ "contents": [
622
+ ("col1", "col2", "col3"),
623
+ ("val11b", "val12b", "val13b"),
624
+ ("val21b", "val22b", "val23b"),
625
+ ],
626
+ "last_modified": "2023-06-05T03:54:07.000000Z",
627
+ },
628
+ }
629
+ )
630
+ .set_file_type("csv")
631
+ .set_cursor_cls(FileBasedConcurrentCursor)
632
+ )
633
+ .set_expected_catalog(
634
+ {
635
+ "streams": [
636
+ {
637
+ "default_cursor_field": ["_ab_source_file_last_modified"],
638
+ "json_schema": {
639
+ "type": "object",
640
+ "properties": {
641
+ "col1": {
642
+ "type": ["null", "string"],
643
+ },
644
+ "col2": {
645
+ "type": ["null", "string"],
646
+ },
647
+ "col3": {
648
+ "type": ["null", "string"],
649
+ },
650
+ "_ab_source_file_last_modified": {"type": "string"},
651
+ "_ab_source_file_url": {"type": "string"},
652
+ },
653
+ },
654
+ "name": "stream1",
655
+ "source_defined_cursor": True,
656
+ "supported_sync_modes": ["full_refresh", "incremental"],
657
+ }
658
+ ]
659
+ }
660
+ )
661
+ .set_expected_records(
662
+ [
663
+ {
664
+ "data": {
665
+ "col1": "val11a",
666
+ "col2": "val12a",
667
+ "_ab_source_file_last_modified": "2023-06-04T03:54:07.000000Z",
668
+ "_ab_source_file_url": "a.csv",
669
+ },
670
+ "stream": "stream1",
671
+ },
672
+ {
673
+ "data": {
674
+ "col1": "val21a",
675
+ "col2": "val22a",
676
+ "_ab_source_file_last_modified": "2023-06-04T03:54:07.000000Z",
677
+ "_ab_source_file_url": "a.csv",
678
+ },
679
+ "stream": "stream1",
680
+ },
681
+ {
682
+ "stream1": {
683
+ "history": {
684
+ "a.csv": "2023-06-04T03:54:07.000000Z",
685
+ },
686
+ "_ab_source_file_last_modified": "2023-06-04T03:54:07.000000Z_a.csv",
687
+ }
688
+ },
689
+ {
690
+ "data": {
691
+ "col1": "val11b",
692
+ "col2": "val12b",
693
+ "col3": "val13b",
694
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
695
+ "_ab_source_file_url": "b.csv",
696
+ },
697
+ "stream": "stream1",
698
+ },
699
+ {
700
+ "data": {
701
+ "col1": "val21b",
702
+ "col2": "val22b",
703
+ "col3": "val23b",
704
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
705
+ "_ab_source_file_url": "b.csv",
706
+ },
707
+ "stream": "stream1",
708
+ },
709
+ {
710
+ "stream1": {
711
+ "history": {"a.csv": "2023-06-04T03:54:07.000000Z", "b.csv": "2023-06-05T03:54:07.000000Z"},
712
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_b.csv",
713
+ }
714
+ },
715
+ ]
716
+ )
717
+ .set_incremental_scenario_config(
718
+ IncrementalScenarioConfig(
719
+ input_state=[],
720
+ )
721
+ )
722
+ ).build()
723
+
724
+ multi_csv_per_timestamp_scenario_concurrent = (
725
+ TestScenarioBuilder()
726
+ .set_name("multi_csv_per_timestamp_concurrent")
727
+ .set_config(
728
+ {
729
+ "streams": [
730
+ {
731
+ "name": "stream1",
732
+ "format": {"filetype": "csv"},
733
+ "globs": ["*.csv"],
734
+ "validation_policy": "Emit Record",
735
+ }
736
+ ]
737
+ }
738
+ )
739
+ .set_source_builder(
740
+ FileBasedSourceBuilder()
741
+ .set_files(
742
+ {
743
+ "a.csv": {
744
+ "contents": [
745
+ ("col1", "col2"),
746
+ ("val11a", "val12a"),
747
+ ("val21a", "val22a"),
748
+ ],
749
+ "last_modified": "2023-06-05T03:54:07.000000Z",
750
+ },
751
+ "b.csv": {
752
+ "contents": [
753
+ ("col1", "col2", "col3"),
754
+ ("val11b", "val12b", "val13b"),
755
+ ("val21b", "val22b", "val23b"),
756
+ ],
757
+ "last_modified": "2023-06-05T03:54:07.000000Z",
758
+ },
759
+ "c.csv": {
760
+ "contents": [
761
+ ("col1", "col2", "col3"),
762
+ ("val11c", "val12c", "val13c"),
763
+ ("val21c", "val22c", "val23c"),
764
+ ],
765
+ "last_modified": "2023-06-06T03:54:07.000000Z",
766
+ },
767
+ }
768
+ )
769
+ .set_file_type("csv")
770
+ .set_cursor_cls(FileBasedConcurrentCursor)
771
+ )
772
+ .set_expected_catalog(
773
+ {
774
+ "streams": [
775
+ {
776
+ "default_cursor_field": ["_ab_source_file_last_modified"],
777
+ "json_schema": {
778
+ "type": "object",
779
+ "properties": {
780
+ "col1": {
781
+ "type": ["null", "string"],
782
+ },
783
+ "col2": {
784
+ "type": ["null", "string"],
785
+ },
786
+ "col3": {
787
+ "type": ["null", "string"],
788
+ },
789
+ "_ab_source_file_last_modified": {"type": "string"},
790
+ "_ab_source_file_url": {"type": "string"},
791
+ },
792
+ },
793
+ "name": "stream1",
794
+ "source_defined_cursor": True,
795
+ "supported_sync_modes": ["full_refresh", "incremental"],
796
+ }
797
+ ]
798
+ }
799
+ )
800
+ .set_expected_records(
801
+ [
802
+ {
803
+ "data": {
804
+ "col1": "val11a",
805
+ "col2": "val12a",
806
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
807
+ "_ab_source_file_url": "a.csv",
808
+ },
809
+ "stream": "stream1",
810
+ },
811
+ {
812
+ "data": {
813
+ "col1": "val21a",
814
+ "col2": "val22a",
815
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
816
+ "_ab_source_file_url": "a.csv",
817
+ },
818
+ "stream": "stream1",
819
+ },
820
+ {
821
+ "data": {
822
+ "col1": "val11b",
823
+ "col2": "val12b",
824
+ "col3": "val13b",
825
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
826
+ "_ab_source_file_url": "b.csv",
827
+ },
828
+ "stream": "stream1",
829
+ },
830
+ {
831
+ "data": {
832
+ "col1": "val21b",
833
+ "col2": "val22b",
834
+ "col3": "val23b",
835
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
836
+ "_ab_source_file_url": "b.csv",
837
+ },
838
+ "stream": "stream1",
839
+ },
840
+ {
841
+ "stream1": {
842
+ "history": {"a.csv": "2023-06-05T03:54:07.000000Z", "b.csv": "2023-06-05T03:54:07.000000Z"},
843
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_b.csv",
844
+ }
845
+ },
846
+ {
847
+ "data": {
848
+ "col1": "val11c",
849
+ "col2": "val12c",
850
+ "col3": "val13c",
851
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
852
+ "_ab_source_file_url": "c.csv",
853
+ },
854
+ "stream": "stream1",
855
+ },
856
+ {
857
+ "data": {
858
+ "col1": "val21c",
859
+ "col2": "val22c",
860
+ "col3": "val23c",
861
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
862
+ "_ab_source_file_url": "c.csv",
863
+ },
864
+ "stream": "stream1",
865
+ },
866
+ {
867
+ "stream1": {
868
+ "history": {
869
+ "a.csv": "2023-06-05T03:54:07.000000Z",
870
+ "b.csv": "2023-06-05T03:54:07.000000Z",
871
+ "c.csv": "2023-06-06T03:54:07.000000Z",
872
+ },
873
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_c.csv",
874
+ }
875
+ },
876
+ ]
877
+ )
878
+ .set_incremental_scenario_config(
879
+ IncrementalScenarioConfig(
880
+ input_state=[],
881
+ )
882
+ )
883
+ ).build()
884
+
885
+ multi_csv_skip_file_if_already_in_history_concurrent = (
886
+ TestScenarioBuilder()
887
+ .set_name("skip_files_already_in_history_concurrent")
888
+ .set_config(
889
+ {
890
+ "streams": [
891
+ {
892
+ "name": "stream1",
893
+ "format": {"filetype": "csv"},
894
+ "globs": ["*.csv"],
895
+ "validation_policy": "Emit Record",
896
+ }
897
+ ]
898
+ }
899
+ )
900
+ .set_source_builder(
901
+ FileBasedSourceBuilder()
902
+ .set_files(
903
+ {
904
+ "a.csv": {
905
+ "contents": [
906
+ ("col1", "col2"),
907
+ ("val11a", "val12a"),
908
+ ("val21a", "val22a"),
909
+ ],
910
+ "last_modified": "2023-06-05T03:54:07.000000Z",
911
+ },
912
+ "b.csv": {
913
+ "contents": [
914
+ ("col1", "col2", "col3"),
915
+ ("val11b", "val12b", "val13b"),
916
+ ("val21b", "val22b", "val23b"),
917
+ ],
918
+ "last_modified": "2023-06-05T03:54:07.000000Z",
919
+ },
920
+ "c.csv": {
921
+ "contents": [
922
+ ("col1", "col2", "col3"),
923
+ ("val11c", "val12c", "val13c"),
924
+ ("val21c", "val22c", "val23c"),
925
+ ],
926
+ "last_modified": "2023-06-06T03:54:07.000000Z",
927
+ },
928
+ }
929
+ )
930
+ .set_file_type("csv")
931
+ .set_cursor_cls(FileBasedConcurrentCursor)
932
+ )
933
+ .set_expected_catalog(
934
+ {
935
+ "streams": [
936
+ {
937
+ "default_cursor_field": ["_ab_source_file_last_modified"],
938
+ "json_schema": {
939
+ "type": "object",
940
+ "properties": {
941
+ "col1": {
942
+ "type": ["null", "string"],
943
+ },
944
+ "col2": {
945
+ "type": ["null", "string"],
946
+ },
947
+ "col3": {
948
+ "type": ["null", "string"],
949
+ },
950
+ "_ab_source_file_last_modified": {"type": "string"},
951
+ "_ab_source_file_url": {"type": "string"},
952
+ },
953
+ },
954
+ "name": "stream1",
955
+ "source_defined_cursor": True,
956
+ "supported_sync_modes": ["full_refresh", "incremental"],
957
+ }
958
+ ]
959
+ }
960
+ )
961
+ .set_expected_records(
962
+ [
963
+ # {"data": {"col1": "val11a", "col2": "val12a"}, "stream": "stream1"}, # this file is skipped
964
+ # {"data": {"col1": "val21a", "col2": "val22a"}, "stream": "stream1"}, # this file is skipped
965
+ {
966
+ "data": {
967
+ "col1": "val11b",
968
+ "col2": "val12b",
969
+ "col3": "val13b",
970
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
971
+ "_ab_source_file_url": "b.csv",
972
+ },
973
+ "stream": "stream1",
974
+ },
975
+ {
976
+ "data": {
977
+ "col1": "val21b",
978
+ "col2": "val22b",
979
+ "col3": "val23b",
980
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
981
+ "_ab_source_file_url": "b.csv",
982
+ },
983
+ "stream": "stream1",
984
+ },
985
+ {
986
+ "stream1": {
987
+ "history": {"a.csv": "2023-06-05T03:54:07.000000Z", "b.csv": "2023-06-05T03:54:07.000000Z"},
988
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_b.csv",
989
+ }
990
+ },
991
+ {
992
+ "data": {
993
+ "col1": "val11c",
994
+ "col2": "val12c",
995
+ "col3": "val13c",
996
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
997
+ "_ab_source_file_url": "c.csv",
998
+ },
999
+ "stream": "stream1",
1000
+ },
1001
+ {
1002
+ "data": {
1003
+ "col1": "val21c",
1004
+ "col2": "val22c",
1005
+ "col3": "val23c",
1006
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
1007
+ "_ab_source_file_url": "c.csv",
1008
+ },
1009
+ "stream": "stream1",
1010
+ },
1011
+ {
1012
+ "stream1": {
1013
+ "history": {
1014
+ "a.csv": "2023-06-05T03:54:07.000000Z",
1015
+ "b.csv": "2023-06-05T03:54:07.000000Z",
1016
+ "c.csv": "2023-06-06T03:54:07.000000Z",
1017
+ },
1018
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_c.csv",
1019
+ }
1020
+ },
1021
+ ]
1022
+ )
1023
+ .set_incremental_scenario_config(
1024
+ IncrementalScenarioConfig(
1025
+ input_state=StateBuilder().with_stream_state(
1026
+ "stream1",
1027
+ {"history": {"a.csv": "2023-06-05T03:54:07.000000Z"},
1028
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv"}).build(),
1029
+ )
1030
+ )
1031
+ ).build()
1032
+
1033
+ multi_csv_include_missing_files_within_history_range_concurrent_cursor_is_newer = (
1034
+ TestScenarioBuilder()
1035
+ .set_name("multi_csv_include_missing_files_within_history_range_concurrent_cursor_is_newer")
1036
+ .set_config(
1037
+ {
1038
+ "streams": [
1039
+ {
1040
+ "name": "stream1",
1041
+ "format": {"filetype": "csv"},
1042
+ "globs": ["*.csv"],
1043
+ "validation_policy": "Emit Record",
1044
+ }
1045
+ ]
1046
+ }
1047
+ )
1048
+ .set_source_builder(
1049
+ FileBasedSourceBuilder()
1050
+ .set_files(
1051
+ {
1052
+ "a.csv": {
1053
+ "contents": [
1054
+ ("col1", "col2"),
1055
+ ("val11a", "val12a"),
1056
+ ("val21a", "val22a"),
1057
+ ],
1058
+ "last_modified": "2023-06-05T03:54:07.000000Z",
1059
+ },
1060
+ "b.csv": {
1061
+ "contents": [
1062
+ ("col1", "col2", "col3"),
1063
+ ("val11b", "val12b", "val13b"),
1064
+ ("val21b", "val22b", "val23b"),
1065
+ ],
1066
+ "last_modified": "2023-06-05T03:54:07.000000Z",
1067
+ },
1068
+ "c.csv": {
1069
+ "contents": [
1070
+ ("col1", "col2", "col3"),
1071
+ ("val11c", "val12c", "val13c"),
1072
+ ("val21c", "val22c", "val23c"),
1073
+ ],
1074
+ "last_modified": "2023-06-06T03:54:07.000000Z",
1075
+ },
1076
+ }
1077
+ )
1078
+ .set_file_type("csv")
1079
+ .set_cursor_cls(FileBasedConcurrentCursor)
1080
+ )
1081
+ .set_expected_catalog(
1082
+ {
1083
+ "streams": [
1084
+ {
1085
+ "default_cursor_field": ["_ab_source_file_last_modified"],
1086
+ "json_schema": {
1087
+ "type": "object",
1088
+ "properties": {
1089
+ "col1": {
1090
+ "type": ["null", "string"],
1091
+ },
1092
+ "col2": {
1093
+ "type": ["null", "string"],
1094
+ },
1095
+ "col3": {
1096
+ "type": ["null", "string"],
1097
+ },
1098
+ "_ab_source_file_last_modified": {"type": "string"},
1099
+ "_ab_source_file_url": {"type": "string"},
1100
+ },
1101
+ },
1102
+ "name": "stream1",
1103
+ "source_defined_cursor": True,
1104
+ "supported_sync_modes": ["full_refresh", "incremental"],
1105
+ }
1106
+ ]
1107
+ }
1108
+ )
1109
+ .set_expected_records(
1110
+ [
1111
+ # {"data": {"col1": "val11a", "col2": "val12a"}, "stream": "stream1"}, # this file is skipped
1112
+ # {"data": {"col1": "val21a", "col2": "val22a"}, "stream": "stream1"}, # this file is skipped
1113
+ {
1114
+ "data": {
1115
+ "col1": "val11b",
1116
+ "col2": "val12b",
1117
+ "col3": "val13b",
1118
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1119
+ "_ab_source_file_url": "b.csv",
1120
+ },
1121
+ "stream": "stream1",
1122
+ },
1123
+ {
1124
+ "data": {
1125
+ "col1": "val21b",
1126
+ "col2": "val22b",
1127
+ "col3": "val23b",
1128
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1129
+ "_ab_source_file_url": "b.csv",
1130
+ },
1131
+ "stream": "stream1",
1132
+ },
1133
+ # {"data": {"col1": "val11c", "col2": "val12c", "col3": "val13c"}, "stream": "stream1"}, # this file is skipped
1134
+ # {"data": {"col1": "val21c", "col2": "val22c", "col3": "val23c"}, "stream": "stream1"}, # this file is skipped
1135
+ {
1136
+ "stream1": {
1137
+ "history": {
1138
+ "a.csv": "2023-06-05T03:54:07.000000Z",
1139
+ "b.csv": "2023-06-05T03:54:07.000000Z",
1140
+ "c.csv": "2023-06-06T03:54:07.000000Z",
1141
+ },
1142
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_c.csv",
1143
+ }
1144
+ },
1145
+ ]
1146
+ )
1147
+ .set_incremental_scenario_config(
1148
+ IncrementalScenarioConfig(
1149
+ input_state=StateBuilder().with_stream_state(
1150
+ "stream1",
1151
+ {
1152
+ "history": {"a.csv": "2023-06-05T03:54:07.000000Z", "c.csv": "2023-06-06T03:54:07.000000Z"},
1153
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_c.csv",
1154
+ }
1155
+ ).build(),
1156
+ )
1157
+ )
1158
+ ).build()
1159
+
1160
+ multi_csv_include_missing_files_within_history_range_concurrent_cursor_is_older = (
1161
+ TestScenarioBuilder()
1162
+ .set_name("multi_csv_include_missing_files_within_history_range_concurrent_cursor_is_older")
1163
+ .set_config(
1164
+ {
1165
+ "streams": [
1166
+ {
1167
+ "name": "stream1",
1168
+ "format": {"filetype": "csv"},
1169
+ "globs": ["*.csv"],
1170
+ "validation_policy": "Emit Record",
1171
+ }
1172
+ ]
1173
+ }
1174
+ )
1175
+ .set_source_builder(
1176
+ FileBasedSourceBuilder()
1177
+ .set_files(
1178
+ {
1179
+ "a.csv": {
1180
+ "contents": [
1181
+ ("col1", "col2"),
1182
+ ("val11a", "val12a"),
1183
+ ("val21a", "val22a"),
1184
+ ],
1185
+ "last_modified": "2023-06-05T03:54:07.000000Z",
1186
+ },
1187
+ "b.csv": {
1188
+ "contents": [
1189
+ ("col1", "col2", "col3"),
1190
+ ("val11b", "val12b", "val13b"),
1191
+ ("val21b", "val22b", "val23b"),
1192
+ ],
1193
+ "last_modified": "2023-06-05T03:54:07.000000Z",
1194
+ },
1195
+ "c.csv": {
1196
+ "contents": [
1197
+ ("col1", "col2", "col3"),
1198
+ ("val11c", "val12c", "val13c"),
1199
+ ("val21c", "val22c", "val23c"),
1200
+ ],
1201
+ "last_modified": "2023-06-06T03:54:07.000000Z",
1202
+ },
1203
+ }
1204
+ )
1205
+ .set_file_type("csv")
1206
+ .set_cursor_cls(FileBasedConcurrentCursor)
1207
+ )
1208
+ .set_expected_catalog(
1209
+ {
1210
+ "streams": [
1211
+ {
1212
+ "default_cursor_field": ["_ab_source_file_last_modified"],
1213
+ "json_schema": {
1214
+ "type": "object",
1215
+ "properties": {
1216
+ "col1": {
1217
+ "type": ["null", "string"],
1218
+ },
1219
+ "col2": {
1220
+ "type": ["null", "string"],
1221
+ },
1222
+ "col3": {
1223
+ "type": ["null", "string"],
1224
+ },
1225
+ "_ab_source_file_last_modified": {"type": "string"},
1226
+ "_ab_source_file_url": {"type": "string"},
1227
+ },
1228
+ },
1229
+ "name": "stream1",
1230
+ "source_defined_cursor": True,
1231
+ "supported_sync_modes": ["full_refresh", "incremental"],
1232
+ }
1233
+ ]
1234
+ }
1235
+ )
1236
+ .set_expected_records(
1237
+ [
1238
+ # {"data": {"col1": "val11a", "col2": "val12a"}, "stream": "stream1"}, # this file is skipped
1239
+ # {"data": {"col1": "val21a", "col2": "val22a"}, "stream": "stream1"}, # this file is skipped
1240
+ {
1241
+ "data": {
1242
+ "col1": "val11b",
1243
+ "col2": "val12b",
1244
+ "col3": "val13b",
1245
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1246
+ "_ab_source_file_url": "b.csv",
1247
+ },
1248
+ "stream": "stream1",
1249
+ },
1250
+ {
1251
+ "data": {
1252
+ "col1": "val21b",
1253
+ "col2": "val22b",
1254
+ "col3": "val23b",
1255
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1256
+ "_ab_source_file_url": "b.csv",
1257
+ },
1258
+ "stream": "stream1",
1259
+ },
1260
+ # {"data": {"col1": "val11c", "col2": "val12c", "col3": "val13c"}, "stream": "stream1"}, # this file is skipped
1261
+ # {"data": {"col1": "val21c", "col2": "val22c", "col3": "val23c"}, "stream": "stream1"}, # this file is skipped
1262
+ {
1263
+ "stream1": {
1264
+ "history": {
1265
+ "a.csv": "2023-06-05T03:54:07.000000Z",
1266
+ "b.csv": "2023-06-05T03:54:07.000000Z",
1267
+ "c.csv": "2023-06-06T03:54:07.000000Z",
1268
+ },
1269
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_c.csv",
1270
+ }
1271
+ },
1272
+ ]
1273
+ )
1274
+ .set_incremental_scenario_config(
1275
+ IncrementalScenarioConfig(
1276
+ input_state=StateBuilder().with_stream_state(
1277
+ "stream1",
1278
+ {
1279
+ "history": {"a.csv": "2023-06-05T03:54:07.000000Z", "c.csv": "2023-06-06T03:54:07.000000Z"},
1280
+ "_ab_source_file_last_modified": "2023-06-03T03:54:07.000000Z_x.csv",
1281
+ }
1282
+ ).build()
1283
+ )
1284
+ )
1285
+ ).build()
1286
+
1287
+ multi_csv_remove_old_files_if_history_is_full_scenario_concurrent_cursor_is_newer = (
1288
+ TestScenarioBuilder()
1289
+ .set_name("multi_csv_remove_old_files_if_history_is_full_scenario_concurrent_cursor_is_newer")
1290
+ .set_config(
1291
+ {
1292
+ "streams": [
1293
+ {
1294
+ "name": "stream1",
1295
+ "format": {"filetype": "csv"},
1296
+ "globs": ["*.csv"],
1297
+ "validation_policy": "Emit Record",
1298
+ }
1299
+ ]
1300
+ }
1301
+ )
1302
+ .set_source_builder(
1303
+ FileBasedSourceBuilder()
1304
+ .set_files(
1305
+ {
1306
+ "a.csv": {
1307
+ "contents": [
1308
+ ("col1", "col2"),
1309
+ ("val11a", "val12a"),
1310
+ ("val21a", "val22a"),
1311
+ ],
1312
+ "last_modified": "2023-06-06T03:54:07.000000Z",
1313
+ },
1314
+ "b.csv": {
1315
+ "contents": [
1316
+ ("col1", "col2", "col3"),
1317
+ ("val11b", "val12b", "val13b"),
1318
+ ("val21b", "val22b", "val23b"),
1319
+ ],
1320
+ "last_modified": "2023-06-07T03:54:07.000000Z",
1321
+ },
1322
+ "c.csv": {
1323
+ "contents": [
1324
+ ("col1", "col2", "col3"),
1325
+ ("val11c", "val12c", "val13c"),
1326
+ ("val21c", "val22c", "val23c"),
1327
+ ],
1328
+ "last_modified": "2023-06-10T03:54:07.000000Z",
1329
+ },
1330
+ }
1331
+ )
1332
+ .set_file_type("csv")
1333
+ .set_cursor_cls(LowHistoryLimitConcurrentCursor)
1334
+ )
1335
+ .set_expected_catalog(
1336
+ {
1337
+ "streams": [
1338
+ {
1339
+ "default_cursor_field": ["_ab_source_file_last_modified"],
1340
+ "json_schema": {
1341
+ "type": "object",
1342
+ "properties": {
1343
+ "col1": {
1344
+ "type": ["null", "string"],
1345
+ },
1346
+ "col2": {
1347
+ "type": ["null", "string"],
1348
+ },
1349
+ "col3": {
1350
+ "type": ["null", "string"],
1351
+ },
1352
+ "_ab_source_file_last_modified": {"type": "string"},
1353
+ "_ab_source_file_url": {"type": "string"},
1354
+ },
1355
+ },
1356
+ "name": "stream1",
1357
+ "source_defined_cursor": True,
1358
+ "supported_sync_modes": ["full_refresh", "incremental"],
1359
+ }
1360
+ ]
1361
+ }
1362
+ )
1363
+ .set_expected_records(
1364
+ [
1365
+ {
1366
+ "data": {
1367
+ "col1": "val11a",
1368
+ "col2": "val12a",
1369
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
1370
+ "_ab_source_file_url": "a.csv",
1371
+ },
1372
+ "stream": "stream1",
1373
+ },
1374
+ {
1375
+ "data": {
1376
+ "col1": "val21a",
1377
+ "col2": "val22a",
1378
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
1379
+ "_ab_source_file_url": "a.csv",
1380
+ },
1381
+ "stream": "stream1",
1382
+ },
1383
+ {
1384
+ "stream1": {
1385
+ "history": {
1386
+ "very_old_file.csv": "2023-06-02T03:54:07.000000Z",
1387
+ "old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z",
1388
+ "a.csv": "2023-06-06T03:54:07.000000Z",
1389
+ },
1390
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_old_file_same_timestamp_as_a.csv",
1391
+ }
1392
+ },
1393
+ {
1394
+ "data": {
1395
+ "col1": "val11b",
1396
+ "col2": "val12b",
1397
+ "col3": "val13b",
1398
+ "_ab_source_file_last_modified": "2023-06-07T03:54:07.000000Z",
1399
+ "_ab_source_file_url": "b.csv",
1400
+ },
1401
+ "stream": "stream1",
1402
+ },
1403
+ {
1404
+ "data": {
1405
+ "col1": "val21b",
1406
+ "col2": "val22b",
1407
+ "col3": "val23b",
1408
+ "_ab_source_file_last_modified": "2023-06-07T03:54:07.000000Z",
1409
+ "_ab_source_file_url": "b.csv",
1410
+ },
1411
+ "stream": "stream1",
1412
+ },
1413
+ {
1414
+ "stream1": {
1415
+ "history": {
1416
+ "old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z",
1417
+ "a.csv": "2023-06-06T03:54:07.000000Z",
1418
+ "b.csv": "2023-06-07T03:54:07.000000Z",
1419
+ },
1420
+ "_ab_source_file_last_modified": "2023-06-07T03:54:07.000000Z_b.csv",
1421
+ }
1422
+ },
1423
+ {
1424
+ "data": {
1425
+ "col1": "val11c",
1426
+ "col2": "val12c",
1427
+ "col3": "val13c",
1428
+ "_ab_source_file_last_modified": "2023-06-10T03:54:07.000000Z",
1429
+ "_ab_source_file_url": "c.csv",
1430
+ },
1431
+ "stream": "stream1",
1432
+ },
1433
+ {
1434
+ "data": {
1435
+ "col1": "val21c",
1436
+ "col2": "val22c",
1437
+ "col3": "val23c",
1438
+ "_ab_source_file_last_modified": "2023-06-10T03:54:07.000000Z",
1439
+ "_ab_source_file_url": "c.csv",
1440
+ },
1441
+ "stream": "stream1",
1442
+ },
1443
+ {
1444
+ "stream1": {
1445
+ "history": {
1446
+ "old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z",
1447
+ "b.csv": "2023-06-07T03:54:07.000000Z",
1448
+ "c.csv": "2023-06-10T03:54:07.000000Z",
1449
+ },
1450
+ "_ab_source_file_last_modified": "2023-06-10T03:54:07.000000Z_c.csv",
1451
+ }
1452
+ },
1453
+ ]
1454
+ )
1455
+ .set_incremental_scenario_config(
1456
+ IncrementalScenarioConfig(
1457
+ input_state=StateBuilder().with_stream_state(
1458
+ "stream1",
1459
+ {
1460
+ "history": {
1461
+ "very_very_old_file.csv": "2023-06-01T03:54:07.000000Z",
1462
+ "very_old_file.csv": "2023-06-02T03:54:07.000000Z",
1463
+ "old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z",
1464
+ },
1465
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_old_file_same_timestamp_as_a.csv",
1466
+ }
1467
+ ).build(),
1468
+ )
1469
+ )
1470
+ ).build()
1471
+
1472
+ multi_csv_remove_old_files_if_history_is_full_scenario_concurrent_cursor_is_older = (
1473
+ TestScenarioBuilder()
1474
+ .set_name("multi_csv_remove_old_files_if_history_is_full_scenario_concurrent_cursor_is_older")
1475
+ .set_config(
1476
+ {
1477
+ "streams": [
1478
+ {
1479
+ "name": "stream1",
1480
+ "format": {"filetype": "csv"},
1481
+ "globs": ["*.csv"],
1482
+ "validation_policy": "Emit Record",
1483
+ }
1484
+ ]
1485
+ }
1486
+ )
1487
+ .set_source_builder(
1488
+ FileBasedSourceBuilder()
1489
+ .set_files(
1490
+ {
1491
+ "a.csv": {
1492
+ "contents": [
1493
+ ("col1", "col2"),
1494
+ ("val11a", "val12a"),
1495
+ ("val21a", "val22a"),
1496
+ ],
1497
+ "last_modified": "2023-06-06T03:54:07.000000Z",
1498
+ },
1499
+ "b.csv": {
1500
+ "contents": [
1501
+ ("col1", "col2", "col3"),
1502
+ ("val11b", "val12b", "val13b"),
1503
+ ("val21b", "val22b", "val23b"),
1504
+ ],
1505
+ "last_modified": "2023-06-07T03:54:07.000000Z",
1506
+ },
1507
+ "c.csv": {
1508
+ "contents": [
1509
+ ("col1", "col2", "col3"),
1510
+ ("val11c", "val12c", "val13c"),
1511
+ ("val21c", "val22c", "val23c"),
1512
+ ],
1513
+ "last_modified": "2023-06-10T03:54:07.000000Z",
1514
+ },
1515
+ }
1516
+ )
1517
+ .set_file_type("csv")
1518
+ .set_cursor_cls(LowHistoryLimitConcurrentCursor)
1519
+ )
1520
+ .set_expected_catalog(
1521
+ {
1522
+ "streams": [
1523
+ {
1524
+ "default_cursor_field": ["_ab_source_file_last_modified"],
1525
+ "json_schema": {
1526
+ "type": "object",
1527
+ "properties": {
1528
+ "col1": {
1529
+ "type": ["null", "string"],
1530
+ },
1531
+ "col2": {
1532
+ "type": ["null", "string"],
1533
+ },
1534
+ "col3": {
1535
+ "type": ["null", "string"],
1536
+ },
1537
+ "_ab_source_file_last_modified": {"type": "string"},
1538
+ "_ab_source_file_url": {"type": "string"},
1539
+ },
1540
+ },
1541
+ "name": "stream1",
1542
+ "source_defined_cursor": True,
1543
+ "supported_sync_modes": ["full_refresh", "incremental"],
1544
+ }
1545
+ ]
1546
+ }
1547
+ )
1548
+ .set_expected_records(
1549
+ [
1550
+ {
1551
+ "data": {
1552
+ "col1": "val11a",
1553
+ "col2": "val12a",
1554
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
1555
+ "_ab_source_file_url": "a.csv",
1556
+ },
1557
+ "stream": "stream1",
1558
+ },
1559
+ {
1560
+ "data": {
1561
+ "col1": "val21a",
1562
+ "col2": "val22a",
1563
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
1564
+ "_ab_source_file_url": "a.csv",
1565
+ },
1566
+ "stream": "stream1",
1567
+ },
1568
+ {
1569
+ "stream1": {
1570
+ "history": {
1571
+ "very_old_file.csv": "2023-06-02T03:54:07.000000Z",
1572
+ "old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z",
1573
+ "a.csv": "2023-06-06T03:54:07.000000Z",
1574
+ },
1575
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_old_file_same_timestamp_as_a.csv",
1576
+ }
1577
+ },
1578
+ {
1579
+ "data": {
1580
+ "col1": "val11b",
1581
+ "col2": "val12b",
1582
+ "col3": "val13b",
1583
+ "_ab_source_file_last_modified": "2023-06-07T03:54:07.000000Z",
1584
+ "_ab_source_file_url": "b.csv",
1585
+ },
1586
+ "stream": "stream1",
1587
+ },
1588
+ {
1589
+ "data": {
1590
+ "col1": "val21b",
1591
+ "col2": "val22b",
1592
+ "col3": "val23b",
1593
+ "_ab_source_file_last_modified": "2023-06-07T03:54:07.000000Z",
1594
+ "_ab_source_file_url": "b.csv",
1595
+ },
1596
+ "stream": "stream1",
1597
+ },
1598
+ {
1599
+ "stream1": {
1600
+ "history": {
1601
+ "old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z",
1602
+ "a.csv": "2023-06-06T03:54:07.000000Z",
1603
+ "b.csv": "2023-06-07T03:54:07.000000Z",
1604
+ },
1605
+ "_ab_source_file_last_modified": "2023-06-07T03:54:07.000000Z_b.csv",
1606
+ }
1607
+ },
1608
+ {
1609
+ "data": {
1610
+ "col1": "val11c",
1611
+ "col2": "val12c",
1612
+ "col3": "val13c",
1613
+ "_ab_source_file_last_modified": "2023-06-10T03:54:07.000000Z",
1614
+ "_ab_source_file_url": "c.csv",
1615
+ },
1616
+ "stream": "stream1",
1617
+ },
1618
+ {
1619
+ "data": {
1620
+ "col1": "val21c",
1621
+ "col2": "val22c",
1622
+ "col3": "val23c",
1623
+ "_ab_source_file_last_modified": "2023-06-10T03:54:07.000000Z",
1624
+ "_ab_source_file_url": "c.csv",
1625
+ },
1626
+ "stream": "stream1",
1627
+ },
1628
+ {
1629
+ "stream1": {
1630
+ "history": {
1631
+ "old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z",
1632
+ "b.csv": "2023-06-07T03:54:07.000000Z",
1633
+ "c.csv": "2023-06-10T03:54:07.000000Z",
1634
+ },
1635
+ "_ab_source_file_last_modified": "2023-06-10T03:54:07.000000Z_c.csv",
1636
+ }
1637
+ },
1638
+ ]
1639
+ )
1640
+ .set_incremental_scenario_config(
1641
+ IncrementalScenarioConfig(
1642
+ input_state=StateBuilder().with_stream_state(
1643
+ "stream1",
1644
+ {
1645
+ "history": {
1646
+ "very_very_old_file.csv": "2023-06-01T03:54:07.000000Z",
1647
+ "very_old_file.csv": "2023-06-02T03:54:07.000000Z",
1648
+ "old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z",
1649
+ },
1650
+ "_ab_source_file_last_modified": "2023-05-01T03:54:07.000000Z_very_very_very_old_file.csv",
1651
+ }
1652
+ ).build(),
1653
+ )
1654
+ )
1655
+ ).build()
1656
+
1657
+ multi_csv_same_timestamp_more_files_than_history_size_scenario_concurrent_cursor_is_newer = (
1658
+ TestScenarioBuilder()
1659
+ .set_name("multi_csv_same_timestamp_more_files_than_history_size_scenario_concurrent_cursor_is_newer")
1660
+ .set_config(
1661
+ {
1662
+ "streams": [
1663
+ {
1664
+ "name": "stream1",
1665
+ "format": {"filetype": "csv"},
1666
+ "globs": ["*.csv"],
1667
+ "validation_policy": "Emit Record",
1668
+ "days_to_sync_if_history_is_full": 3,
1669
+ }
1670
+ ]
1671
+ }
1672
+ )
1673
+ .set_source_builder(
1674
+ FileBasedSourceBuilder()
1675
+ .set_files(
1676
+ {
1677
+ "b.csv": {
1678
+ "contents": [
1679
+ ("col1", "col2", "col3"),
1680
+ ("val11b", "val12b", "val13b"),
1681
+ ("val21b", "val22b", "val23b"),
1682
+ ],
1683
+ "last_modified": "2023-06-05T03:54:07.000000Z",
1684
+ },
1685
+ "a.csv": {
1686
+ "contents": [
1687
+ ("col1", "col2"),
1688
+ ("val11a", "val12a"),
1689
+ ("val21a", "val22a"),
1690
+ ],
1691
+ "last_modified": "2023-06-05T03:54:07.000000Z",
1692
+ },
1693
+ "c.csv": {
1694
+ "contents": [
1695
+ ("col1", "col2", "col3"),
1696
+ ("val11c", "val12c", "val13c"),
1697
+ ("val21c", "val22c", "val23c"),
1698
+ ],
1699
+ "last_modified": "2023-06-05T03:54:07.000000Z",
1700
+ },
1701
+ "d.csv": {
1702
+ "contents": [
1703
+ ("col1", "col2", "col3"),
1704
+ ("val11d", "val12d", "val13d"),
1705
+ ("val21d", "val22d", "val23d"),
1706
+ ],
1707
+ "last_modified": "2023-06-05T03:54:07.000000Z",
1708
+ },
1709
+ }
1710
+ )
1711
+ .set_file_type("csv")
1712
+ .set_cursor_cls(LowHistoryLimitConcurrentCursor)
1713
+ )
1714
+ .set_expected_catalog(
1715
+ {
1716
+ "streams": [
1717
+ {
1718
+ "default_cursor_field": ["_ab_source_file_last_modified"],
1719
+ "json_schema": {
1720
+ "type": "object",
1721
+ "properties": {
1722
+ "col1": {
1723
+ "type": ["null", "string"],
1724
+ },
1725
+ "col2": {
1726
+ "type": ["null", "string"],
1727
+ },
1728
+ "col3": {
1729
+ "type": ["null", "string"],
1730
+ },
1731
+ "_ab_source_file_last_modified": {"type": "string"},
1732
+ "_ab_source_file_url": {"type": "string"},
1733
+ },
1734
+ },
1735
+ "name": "stream1",
1736
+ "source_defined_cursor": True,
1737
+ "supported_sync_modes": ["full_refresh", "incremental"],
1738
+ }
1739
+ ]
1740
+ }
1741
+ )
1742
+ .set_expected_records(
1743
+ [
1744
+ {
1745
+ "data": {
1746
+ "col1": "val11a",
1747
+ "col2": "val12a",
1748
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1749
+ "_ab_source_file_url": "a.csv",
1750
+ },
1751
+ "stream": "stream1",
1752
+ },
1753
+ {
1754
+ "data": {
1755
+ "col1": "val21a",
1756
+ "col2": "val22a",
1757
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1758
+ "_ab_source_file_url": "a.csv",
1759
+ },
1760
+ "stream": "stream1",
1761
+ },
1762
+ {
1763
+ "data": {
1764
+ "col1": "val11b",
1765
+ "col2": "val12b",
1766
+ "col3": "val13b",
1767
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1768
+ "_ab_source_file_url": "b.csv",
1769
+ },
1770
+ "stream": "stream1",
1771
+ },
1772
+ {
1773
+ "data": {
1774
+ "col1": "val21b",
1775
+ "col2": "val22b",
1776
+ "col3": "val23b",
1777
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1778
+ "_ab_source_file_url": "b.csv",
1779
+ },
1780
+ "stream": "stream1",
1781
+ },
1782
+ {
1783
+ "data": {
1784
+ "col1": "val11c",
1785
+ "col2": "val12c",
1786
+ "col3": "val13c",
1787
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1788
+ "_ab_source_file_url": "c.csv",
1789
+ },
1790
+ "stream": "stream1",
1791
+ },
1792
+ {
1793
+ "data": {
1794
+ "col1": "val21c",
1795
+ "col2": "val22c",
1796
+ "col3": "val23c",
1797
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1798
+ "_ab_source_file_url": "c.csv",
1799
+ },
1800
+ "stream": "stream1",
1801
+ },
1802
+ {
1803
+ "data": {
1804
+ "col1": "val11d",
1805
+ "col2": "val12d",
1806
+ "col3": "val13d",
1807
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1808
+ "_ab_source_file_url": "d.csv",
1809
+ },
1810
+ "stream": "stream1",
1811
+ },
1812
+ {
1813
+ "data": {
1814
+ "col1": "val21d",
1815
+ "col2": "val22d",
1816
+ "col3": "val23d",
1817
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1818
+ "_ab_source_file_url": "d.csv",
1819
+ },
1820
+ "stream": "stream1",
1821
+ },
1822
+ {
1823
+ "stream1": {
1824
+ "history": {
1825
+ "b.csv": "2023-06-05T03:54:07.000000Z",
1826
+ "c.csv": "2023-06-05T03:54:07.000000Z",
1827
+ "d.csv": "2023-06-05T03:54:07.000000Z",
1828
+ },
1829
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_d.csv",
1830
+ }
1831
+ },
1832
+ ]
1833
+ )
1834
+ .set_incremental_scenario_config(
1835
+ IncrementalScenarioConfig(
1836
+ input_state=[],
1837
+ )
1838
+ )
1839
+ ).build()
1840
+
1841
+ multi_csv_same_timestamp_more_files_than_history_size_scenario_concurrent_cursor_is_older = (
1842
+ TestScenarioBuilder()
1843
+ .set_name("multi_csv_same_timestamp_more_files_than_history_size_scenario_concurrent_cursor_is_older")
1844
+ .set_config(
1845
+ {
1846
+ "streams": [
1847
+ {
1848
+ "name": "stream1",
1849
+ "format": {"filetype": "csv"},
1850
+ "globs": ["*.csv"],
1851
+ "validation_policy": "Emit Record",
1852
+ "days_to_sync_if_history_is_full": 3,
1853
+ }
1854
+ ]
1855
+ }
1856
+ )
1857
+ .set_source_builder(
1858
+ FileBasedSourceBuilder()
1859
+ .set_files(
1860
+ {
1861
+ "b.csv": {
1862
+ "contents": [
1863
+ ("col1", "col2", "col3"),
1864
+ ("val11b", "val12b", "val13b"),
1865
+ ("val21b", "val22b", "val23b"),
1866
+ ],
1867
+ "last_modified": "2023-06-05T03:54:07.000000Z",
1868
+ },
1869
+ "a.csv": {
1870
+ "contents": [
1871
+ ("col1", "col2"),
1872
+ ("val11a", "val12a"),
1873
+ ("val21a", "val22a"),
1874
+ ],
1875
+ "last_modified": "2023-06-05T03:54:07.000000Z",
1876
+ },
1877
+ "c.csv": {
1878
+ "contents": [
1879
+ ("col1", "col2", "col3"),
1880
+ ("val11c", "val12c", "val13c"),
1881
+ ("val21c", "val22c", "val23c"),
1882
+ ],
1883
+ "last_modified": "2023-06-05T03:54:07.000000Z",
1884
+ },
1885
+ "d.csv": {
1886
+ "contents": [
1887
+ ("col1", "col2", "col3"),
1888
+ ("val11d", "val12d", "val13d"),
1889
+ ("val21d", "val22d", "val23d"),
1890
+ ],
1891
+ "last_modified": "2023-06-05T03:54:07.000000Z",
1892
+ },
1893
+ }
1894
+ )
1895
+ .set_file_type("csv")
1896
+ .set_cursor_cls(LowHistoryLimitConcurrentCursor)
1897
+ )
1898
+ .set_expected_catalog(
1899
+ {
1900
+ "streams": [
1901
+ {
1902
+ "default_cursor_field": ["_ab_source_file_last_modified"],
1903
+ "json_schema": {
1904
+ "type": "object",
1905
+ "properties": {
1906
+ "col1": {
1907
+ "type": ["null", "string"],
1908
+ },
1909
+ "col2": {
1910
+ "type": ["null", "string"],
1911
+ },
1912
+ "col3": {
1913
+ "type": ["null", "string"],
1914
+ },
1915
+ "_ab_source_file_last_modified": {"type": "string"},
1916
+ "_ab_source_file_url": {"type": "string"},
1917
+ },
1918
+ },
1919
+ "name": "stream1",
1920
+ "source_defined_cursor": True,
1921
+ "supported_sync_modes": ["full_refresh", "incremental"],
1922
+ }
1923
+ ]
1924
+ }
1925
+ )
1926
+ .set_expected_records(
1927
+ [
1928
+ {
1929
+ "data": {
1930
+ "col1": "val11a",
1931
+ "col2": "val12a",
1932
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1933
+ "_ab_source_file_url": "a.csv",
1934
+ },
1935
+ "stream": "stream1",
1936
+ },
1937
+ {
1938
+ "data": {
1939
+ "col1": "val21a",
1940
+ "col2": "val22a",
1941
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1942
+ "_ab_source_file_url": "a.csv",
1943
+ },
1944
+ "stream": "stream1",
1945
+ },
1946
+ {
1947
+ "data": {
1948
+ "col1": "val11b",
1949
+ "col2": "val12b",
1950
+ "col3": "val13b",
1951
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1952
+ "_ab_source_file_url": "b.csv",
1953
+ },
1954
+ "stream": "stream1",
1955
+ },
1956
+ {
1957
+ "data": {
1958
+ "col1": "val21b",
1959
+ "col2": "val22b",
1960
+ "col3": "val23b",
1961
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1962
+ "_ab_source_file_url": "b.csv",
1963
+ },
1964
+ "stream": "stream1",
1965
+ },
1966
+ {
1967
+ "data": {
1968
+ "col1": "val11c",
1969
+ "col2": "val12c",
1970
+ "col3": "val13c",
1971
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1972
+ "_ab_source_file_url": "c.csv",
1973
+ },
1974
+ "stream": "stream1",
1975
+ },
1976
+ {
1977
+ "data": {
1978
+ "col1": "val21c",
1979
+ "col2": "val22c",
1980
+ "col3": "val23c",
1981
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1982
+ "_ab_source_file_url": "c.csv",
1983
+ },
1984
+ "stream": "stream1",
1985
+ },
1986
+ {
1987
+ "data": {
1988
+ "col1": "val11d",
1989
+ "col2": "val12d",
1990
+ "col3": "val13d",
1991
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1992
+ "_ab_source_file_url": "d.csv",
1993
+ },
1994
+ "stream": "stream1",
1995
+ },
1996
+ {
1997
+ "data": {
1998
+ "col1": "val21d",
1999
+ "col2": "val22d",
2000
+ "col3": "val23d",
2001
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
2002
+ "_ab_source_file_url": "d.csv",
2003
+ },
2004
+ "stream": "stream1",
2005
+ },
2006
+ {
2007
+ "stream1": {
2008
+ "history": {
2009
+ "b.csv": "2023-06-05T03:54:07.000000Z",
2010
+ "c.csv": "2023-06-05T03:54:07.000000Z",
2011
+ "d.csv": "2023-06-05T03:54:07.000000Z",
2012
+ },
2013
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_d.csv",
2014
+ }
2015
+ },
2016
+ ]
2017
+ )
2018
+ .set_incremental_scenario_config(
2019
+ IncrementalScenarioConfig(
2020
+ input_state=[],
2021
+ )
2022
+ )
2023
+ ).build()
2024
+
2025
+ multi_csv_sync_recent_files_if_history_is_incomplete_scenario_concurrent_cursor_is_older = (
2026
+ TestScenarioBuilder()
2027
+ .set_name("multi_csv_sync_recent_files_if_history_is_incomplete_scenario_concurrent_cursor_is_older")
2028
+ .set_config(
2029
+ {
2030
+ "streams": [
2031
+ {
2032
+ "name": "stream1",
2033
+ "format": {"filetype": "csv"},
2034
+ "globs": ["*.csv"],
2035
+ "validation_policy": "Emit Record",
2036
+ "days_to_sync_if_history_is_full": 3,
2037
+ }
2038
+ ]
2039
+ }
2040
+ )
2041
+ .set_source_builder(
2042
+ FileBasedSourceBuilder()
2043
+ .set_files(
2044
+ {
2045
+ "a.csv": {
2046
+ "contents": [
2047
+ ("col1", "col2"),
2048
+ ("val11a", "val12a"),
2049
+ ("val21a", "val22a"),
2050
+ ],
2051
+ "last_modified": "2023-06-05T03:54:07.000000Z",
2052
+ },
2053
+ "b.csv": {
2054
+ "contents": [
2055
+ ("col1", "col2", "col3"),
2056
+ ("val11b", "val12b", "val13b"),
2057
+ ("val21b", "val22b", "val23b"),
2058
+ ],
2059
+ "last_modified": "2023-06-05T03:54:07.000000Z",
2060
+ },
2061
+ "c.csv": {
2062
+ "contents": [
2063
+ ("col1", "col2", "col3"),
2064
+ ("val11c", "val12c", "val13c"),
2065
+ ("val21c", "val22c", "val23c"),
2066
+ ],
2067
+ "last_modified": "2023-06-05T03:54:07.000000Z",
2068
+ },
2069
+ "d.csv": {
2070
+ "contents": [
2071
+ ("col1", "col2", "col3"),
2072
+ ("val11d", "val12d", "val13d"),
2073
+ ("val21d", "val22d", "val23d"),
2074
+ ],
2075
+ "last_modified": "2023-06-05T03:54:07.000000Z",
2076
+ },
2077
+ }
2078
+ )
2079
+ .set_cursor_cls(LowHistoryLimitConcurrentCursor)
2080
+ .set_file_type("csv")
2081
+ )
2082
+ .set_expected_catalog(
2083
+ {
2084
+ "streams": [
2085
+ {
2086
+ "default_cursor_field": ["_ab_source_file_last_modified"],
2087
+ "json_schema": {
2088
+ "type": "object",
2089
+ "properties": {
2090
+ "col1": {
2091
+ "type": ["null", "string"],
2092
+ },
2093
+ "col2": {
2094
+ "type": ["null", "string"],
2095
+ },
2096
+ "col3": {
2097
+ "type": ["null", "string"],
2098
+ },
2099
+ "_ab_source_file_last_modified": {"type": "string"},
2100
+ "_ab_source_file_url": {"type": "string"},
2101
+ },
2102
+ },
2103
+ "name": "stream1",
2104
+ "source_defined_cursor": True,
2105
+ "supported_sync_modes": ["full_refresh", "incremental"],
2106
+ }
2107
+ ]
2108
+ }
2109
+ )
2110
+ .set_expected_records(
2111
+ [
2112
+ {
2113
+ "stream1": {
2114
+ "history": {
2115
+ "b.csv": "2023-06-05T03:54:07.000000Z",
2116
+ "c.csv": "2023-06-05T03:54:07.000000Z",
2117
+ "d.csv": "2023-06-05T03:54:07.000000Z",
2118
+ },
2119
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_d.csv",
2120
+ }
2121
+ }
2122
+ ]
2123
+ )
2124
+ .set_incremental_scenario_config(
2125
+ IncrementalScenarioConfig(
2126
+ input_state=StateBuilder().with_stream_state(
2127
+ "stream1",
2128
+ {
2129
+ "history": {
2130
+ "b.csv": "2023-06-05T03:54:07.000000Z",
2131
+ "c.csv": "2023-06-05T03:54:07.000000Z",
2132
+ "d.csv": "2023-06-05T03:54:07.000000Z",
2133
+ },
2134
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_b.csv",
2135
+ }
2136
+ ).build(),
2137
+ )
2138
+ )
2139
+ ).build()
2140
+
2141
+ multi_csv_sync_recent_files_if_history_is_incomplete_scenario_concurrent_cursor_is_newer = (
2142
+ TestScenarioBuilder()
2143
+ .set_name("multi_csv_sync_recent_files_if_history_is_incomplete_scenario_concurrent_cursor_is_newer")
2144
+ .set_config(
2145
+ {
2146
+ "streams": [
2147
+ {
2148
+ "name": "stream1",
2149
+ "format": {"filetype": "csv"},
2150
+ "globs": ["*.csv"],
2151
+ "validation_policy": "Emit Record",
2152
+ "days_to_sync_if_history_is_full": 3,
2153
+ }
2154
+ ]
2155
+ }
2156
+ )
2157
+ .set_source_builder(
2158
+ FileBasedSourceBuilder()
2159
+ .set_files(
2160
+ {
2161
+ "a.csv": {
2162
+ "contents": [
2163
+ ("col1", "col2"),
2164
+ ("val11a", "val12a"),
2165
+ ("val21a", "val22a"),
2166
+ ],
2167
+ "last_modified": "2023-06-05T03:54:07.000000Z",
2168
+ },
2169
+ "b.csv": {
2170
+ "contents": [
2171
+ ("col1", "col2", "col3"),
2172
+ ("val11b", "val12b", "val13b"),
2173
+ ("val21b", "val22b", "val23b"),
2174
+ ],
2175
+ "last_modified": "2023-06-05T03:54:07.000000Z",
2176
+ },
2177
+ "c.csv": {
2178
+ "contents": [
2179
+ ("col1", "col2", "col3"),
2180
+ ("val11c", "val12c", "val13c"),
2181
+ ("val21c", "val22c", "val23c"),
2182
+ ],
2183
+ "last_modified": "2023-06-05T03:54:07.000000Z",
2184
+ },
2185
+ "d.csv": {
2186
+ "contents": [
2187
+ ("col1", "col2", "col3"),
2188
+ ("val11d", "val12d", "val13d"),
2189
+ ("val21d", "val22d", "val23d"),
2190
+ ],
2191
+ "last_modified": "2023-06-05T03:54:07.000000Z",
2192
+ },
2193
+ }
2194
+ )
2195
+ .set_cursor_cls(LowHistoryLimitConcurrentCursor)
2196
+ .set_file_type("csv")
2197
+ )
2198
+ .set_expected_catalog(
2199
+ {
2200
+ "streams": [
2201
+ {
2202
+ "default_cursor_field": ["_ab_source_file_last_modified"],
2203
+ "json_schema": {
2204
+ "type": "object",
2205
+ "properties": {
2206
+ "col1": {
2207
+ "type": ["null", "string"],
2208
+ },
2209
+ "col2": {
2210
+ "type": ["null", "string"],
2211
+ },
2212
+ "col3": {
2213
+ "type": ["null", "string"],
2214
+ },
2215
+ "_ab_source_file_last_modified": {"type": "string"},
2216
+ "_ab_source_file_url": {"type": "string"},
2217
+ },
2218
+ },
2219
+ "name": "stream1",
2220
+ "source_defined_cursor": True,
2221
+ "supported_sync_modes": ["full_refresh", "incremental"],
2222
+ }
2223
+ ]
2224
+ }
2225
+ )
2226
+ .set_expected_records(
2227
+ [
2228
+ {
2229
+ "stream1": {
2230
+ "history": {
2231
+ "b.csv": "2023-06-05T03:54:07.000000Z",
2232
+ "c.csv": "2023-06-05T03:54:07.000000Z",
2233
+ "d.csv": "2023-06-05T03:54:07.000000Z",
2234
+ },
2235
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_d.csv",
2236
+ }
2237
+ }
2238
+ ]
2239
+ )
2240
+ .set_incremental_scenario_config(
2241
+ IncrementalScenarioConfig(
2242
+ input_state=StateBuilder().with_stream_state(
2243
+ "stream1",
2244
+ {
2245
+ "history": {
2246
+ "b.csv": "2023-06-05T03:54:07.000000Z",
2247
+ "c.csv": "2023-06-05T03:54:07.000000Z",
2248
+ "d.csv": "2023-06-05T03:54:07.000000Z",
2249
+ },
2250
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_d.csv",
2251
+ }
2252
+ ).build(),
2253
+ )
2254
+ )
2255
+ ).build()
2256
+
2257
+
2258
+ multi_csv_sync_files_within_time_window_if_history_is_incomplete__different_timestamps_scenario_concurrent_cursor_is_older = (
2259
+ TestScenarioBuilder()
2260
+ .set_name("multi_csv_sync_files_within_time_window_if_history_is_incomplete__different_timestamps_scenario_concurrent_cursor_is_older")
2261
+ .set_config(
2262
+ {
2263
+ "streams": [
2264
+ {
2265
+ "name": "stream1",
2266
+ "format": {"filetype": "csv"},
2267
+ "globs": ["*.csv"],
2268
+ "validation_policy": "Emit Record",
2269
+ "days_to_sync_if_history_is_full": 3,
2270
+ }
2271
+ ]
2272
+ }
2273
+ )
2274
+ .set_source_builder(
2275
+ FileBasedSourceBuilder()
2276
+ .set_files(
2277
+ {
2278
+ "a.csv": {
2279
+ "contents": [
2280
+ ("col1", "col2"),
2281
+ ("val11a", "val12a"),
2282
+ ("val21a", "val22a"),
2283
+ ],
2284
+ "last_modified": "2023-06-05T03:54:07.000000Z",
2285
+ },
2286
+ "b.csv": {
2287
+ "contents": [
2288
+ ("col1", "col2", "col3"),
2289
+ ("val11b", "val12b", "val13b"),
2290
+ ("val21b", "val22b", "val23b"),
2291
+ ],
2292
+ "last_modified": "2023-06-06T03:54:07.000000Z",
2293
+ },
2294
+ "c.csv": {
2295
+ "contents": [
2296
+ ("col1", "col2", "col3"),
2297
+ ("val11c", "val12c", "val13c"),
2298
+ ("val21c", "val22c", "val23c"),
2299
+ ],
2300
+ "last_modified": "2023-06-07T03:54:07.000000Z",
2301
+ },
2302
+ "d.csv": {
2303
+ "contents": [
2304
+ ("col1", "col2", "col3"),
2305
+ ("val11d", "val12d", "val13d"),
2306
+ ("val21d", "val22d", "val23d"),
2307
+ ],
2308
+ "last_modified": "2023-06-08T03:54:07.000000Z",
2309
+ },
2310
+ }
2311
+ )
2312
+ .set_file_type("csv")
2313
+ .set_cursor_cls(LowHistoryLimitConcurrentCursor)
2314
+ )
2315
+ .set_expected_catalog(
2316
+ {
2317
+ "streams": [
2318
+ {
2319
+ "default_cursor_field": ["_ab_source_file_last_modified"],
2320
+ "json_schema": {
2321
+ "type": "object",
2322
+ "properties": {
2323
+ "col1": {
2324
+ "type": ["null", "string"],
2325
+ },
2326
+ "col2": {
2327
+ "type": ["null", "string"],
2328
+ },
2329
+ "col3": {
2330
+ "type": ["null", "string"],
2331
+ },
2332
+ "_ab_source_file_last_modified": {"type": "string"},
2333
+ "_ab_source_file_url": {"type": "string"},
2334
+ },
2335
+ },
2336
+ "name": "stream1",
2337
+ "source_defined_cursor": True,
2338
+ "supported_sync_modes": ["full_refresh", "incremental"],
2339
+ }
2340
+ ]
2341
+ }
2342
+ )
2343
+ .set_expected_records(
2344
+ [
2345
+ # {"data": {"col1": "val11a", "col2": "val12a"}, "stream": "stream1"}, # This file is skipped because it is older than the time_window
2346
+ # {"data": {"col1": "val21a", "col2": "val22a"}, "stream": "stream1"},
2347
+ {
2348
+ "data": {
2349
+ "col1": "val11b",
2350
+ "col2": "val12b",
2351
+ "col3": "val13b",
2352
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
2353
+ "_ab_source_file_url": "b.csv",
2354
+ },
2355
+ "stream": "stream1",
2356
+ },
2357
+ {
2358
+ "data": {
2359
+ "col1": "val21b",
2360
+ "col2": "val22b",
2361
+ "col3": "val23b",
2362
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
2363
+ "_ab_source_file_url": "b.csv",
2364
+ },
2365
+ "stream": "stream1",
2366
+ },
2367
+ {
2368
+ "stream1": {
2369
+ "history": {
2370
+ "c.csv": "2023-06-07T03:54:07.000000Z",
2371
+ "d.csv": "2023-06-08T03:54:07.000000Z",
2372
+ "e.csv": "2023-06-08T03:54:07.000000Z",
2373
+ },
2374
+ "_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_e.csv",
2375
+ }
2376
+ },
2377
+ ]
2378
+ )
2379
+ .set_incremental_scenario_config(
2380
+ IncrementalScenarioConfig(
2381
+ input_state=StateBuilder().with_stream_state(
2382
+ "stream1",
2383
+ {
2384
+ "history": {
2385
+ "c.csv": "2023-06-07T03:54:07.000000Z",
2386
+ "d.csv": "2023-06-08T03:54:07.000000Z",
2387
+ "e.csv": "2023-06-08T03:54:07.000000Z",
2388
+ },
2389
+ "_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_e.csv",
2390
+ }
2391
+ ).build(),
2392
+ )
2393
+ )
2394
+ ).build()
2395
+
2396
+ multi_csv_sync_files_within_time_window_if_history_is_incomplete__different_timestamps_scenario_concurrent_cursor_is_newer = (
2397
+ TestScenarioBuilder()
2398
+ .set_name("multi_csv_sync_files_within_time_window_if_history_is_incomplete__different_timestamps_scenario_concurrent_cursor_is_newer")
2399
+ .set_config(
2400
+ {
2401
+ "streams": [
2402
+ {
2403
+ "name": "stream1",
2404
+ "format": {"filetype": "csv"},
2405
+ "globs": ["*.csv"],
2406
+ "validation_policy": "Emit Record",
2407
+ "days_to_sync_if_history_is_full": 3,
2408
+ }
2409
+ ]
2410
+ }
2411
+ )
2412
+ .set_source_builder(
2413
+ FileBasedSourceBuilder()
2414
+ .set_files(
2415
+ {
2416
+ "a.csv": {
2417
+ "contents": [
2418
+ ("col1", "col2"),
2419
+ ("val11a", "val12a"),
2420
+ ("val21a", "val22a"),
2421
+ ],
2422
+ "last_modified": "2023-06-05T03:54:07.000000Z",
2423
+ },
2424
+ "b.csv": {
2425
+ "contents": [
2426
+ ("col1", "col2", "col3"),
2427
+ ("val11b", "val12b", "val13b"),
2428
+ ("val21b", "val22b", "val23b"),
2429
+ ],
2430
+ "last_modified": "2023-06-06T03:54:07.000000Z",
2431
+ },
2432
+ "c.csv": {
2433
+ "contents": [
2434
+ ("col1", "col2", "col3"),
2435
+ ("val11c", "val12c", "val13c"),
2436
+ ("val21c", "val22c", "val23c"),
2437
+ ],
2438
+ "last_modified": "2023-06-07T03:54:07.000000Z",
2439
+ },
2440
+ "d.csv": {
2441
+ "contents": [
2442
+ ("col1", "col2", "col3"),
2443
+ ("val11d", "val12d", "val13d"),
2444
+ ("val21d", "val22d", "val23d"),
2445
+ ],
2446
+ "last_modified": "2023-06-08T03:54:07.000000Z",
2447
+ },
2448
+ }
2449
+ )
2450
+ .set_file_type("csv")
2451
+ .set_cursor_cls(LowHistoryLimitConcurrentCursor)
2452
+ )
2453
+ .set_expected_catalog(
2454
+ {
2455
+ "streams": [
2456
+ {
2457
+ "default_cursor_field": ["_ab_source_file_last_modified"],
2458
+ "json_schema": {
2459
+ "type": "object",
2460
+ "properties": {
2461
+ "col1": {
2462
+ "type": ["null", "string"],
2463
+ },
2464
+ "col2": {
2465
+ "type": ["null", "string"],
2466
+ },
2467
+ "col3": {
2468
+ "type": ["null", "string"],
2469
+ },
2470
+ "_ab_source_file_last_modified": {"type": "string"},
2471
+ "_ab_source_file_url": {"type": "string"},
2472
+ },
2473
+ },
2474
+ "name": "stream1",
2475
+ "source_defined_cursor": True,
2476
+ "supported_sync_modes": ["full_refresh", "incremental"],
2477
+ }
2478
+ ]
2479
+ }
2480
+ )
2481
+ .set_expected_records(
2482
+ [
2483
+ # {"data": {"col1": "val11a", "col2": "val12a"}, "stream": "stream1"}, # This file is skipped because it is older than the time_window
2484
+ # {"data": {"col1": "val21a", "col2": "val22a"}, "stream": "stream1"},
2485
+ {
2486
+ "data": {
2487
+ "col1": "val11b",
2488
+ "col2": "val12b",
2489
+ "col3": "val13b",
2490
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
2491
+ "_ab_source_file_url": "b.csv",
2492
+ },
2493
+ "stream": "stream1",
2494
+ },
2495
+ {
2496
+ "data": {
2497
+ "col1": "val21b",
2498
+ "col2": "val22b",
2499
+ "col3": "val23b",
2500
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
2501
+ "_ab_source_file_url": "b.csv",
2502
+ },
2503
+ "stream": "stream1",
2504
+ },
2505
+ {
2506
+ "stream1": {
2507
+ "history": {
2508
+ "c.csv": "2023-06-07T03:54:07.000000Z",
2509
+ "d.csv": "2023-06-08T03:54:07.000000Z",
2510
+ "e.csv": "2023-06-08T03:54:07.000000Z",
2511
+ },
2512
+ "_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_e.csv",
2513
+ }
2514
+ },
2515
+ ]
2516
+ )
2517
+ .set_incremental_scenario_config(
2518
+ IncrementalScenarioConfig(
2519
+ input_state=StateBuilder().with_stream_state(
2520
+ "stream1",
2521
+ {
2522
+ "history": {
2523
+ "c.csv": "2023-06-07T03:54:07.000000Z",
2524
+ "d.csv": "2023-06-08T03:54:07.000000Z",
2525
+ "e.csv": "2023-06-08T03:54:07.000000Z",
2526
+ },
2527
+ "_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_e.csv",
2528
+ }
2529
+ ).build(),
2530
+ )
2531
+ )
2532
+ ).build()
2533
+
2534
+ multi_csv_sync_files_within_history_time_window_if_history_is_incomplete_different_timestamps_scenario_concurrent_cursor_is_newer = (
2535
+ TestScenarioBuilder()
2536
+ .set_name("multi_csv_sync_files_within_history_time_window_if_history_is_incomplete_different_timestamps_scenario_concurrent_cursor_is_newer")
2537
+ .set_config(
2538
+ {
2539
+ "streams": [
2540
+ {
2541
+ "name": "stream1",
2542
+ "format": {"filetype": "csv"},
2543
+ "globs": ["*.csv"],
2544
+ "validation_policy": "Emit Record",
2545
+ "days_to_sync_if_history_is_full": 3,
2546
+ }
2547
+ ]
2548
+ }
2549
+ )
2550
+ .set_source_builder(
2551
+ FileBasedSourceBuilder()
2552
+ .set_files(
2553
+ {
2554
+ "a.csv": {
2555
+ "contents": [
2556
+ ("col1", "col2"),
2557
+ ("val11a", "val12a"),
2558
+ ("val21a", "val22a"),
2559
+ ],
2560
+ "last_modified": "2023-06-05T03:54:07.000000Z",
2561
+ },
2562
+ "b.csv": {
2563
+ "contents": [
2564
+ ("col1", "col2", "col3"),
2565
+ ("val11b", "val12b", "val13b"),
2566
+ ("val21b", "val22b", "val23b"),
2567
+ ],
2568
+ "last_modified": "2023-06-06T03:54:07.000000Z",
2569
+ },
2570
+ "c.csv": {
2571
+ "contents": [
2572
+ ("col1", "col2", "col3"),
2573
+ ("val11c", "val12c", "val13c"),
2574
+ ("val21c", "val22c", "val23c"),
2575
+ ],
2576
+ "last_modified": "2023-06-07T03:54:07.000000Z",
2577
+ },
2578
+ "d.csv": {
2579
+ "contents": [
2580
+ ("col1", "col2", "col3"),
2581
+ ("val11d", "val12d", "val13d"),
2582
+ ("val21d", "val22d", "val23d"),
2583
+ ],
2584
+ "last_modified": "2023-06-08T03:54:07.000000Z",
2585
+ },
2586
+ }
2587
+ )
2588
+ .set_file_type("csv")
2589
+ .set_cursor_cls(LowHistoryLimitConcurrentCursor)
2590
+ )
2591
+ .set_expected_catalog(
2592
+ {
2593
+ "streams": [
2594
+ {
2595
+ "default_cursor_field": ["_ab_source_file_last_modified"],
2596
+ "json_schema": {
2597
+ "type": "object",
2598
+ "properties": {
2599
+ "col1": {
2600
+ "type": ["null", "string"],
2601
+ },
2602
+ "col2": {
2603
+ "type": ["null", "string"],
2604
+ },
2605
+ "col3": {
2606
+ "type": ["null", "string"],
2607
+ },
2608
+ "_ab_source_file_last_modified": {"type": "string"},
2609
+ "_ab_source_file_url": {"type": "string"},
2610
+ },
2611
+ },
2612
+ "name": "stream1",
2613
+ "source_defined_cursor": True,
2614
+ "supported_sync_modes": ["full_refresh", "incremental"],
2615
+ }
2616
+ ]
2617
+ }
2618
+ )
2619
+ .set_expected_records(
2620
+ [
2621
+ {
2622
+ "data": {
2623
+ "col1": "val11a",
2624
+ "col2": "val12a",
2625
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
2626
+ "_ab_source_file_url": "a.csv",
2627
+ },
2628
+ "stream": "stream1",
2629
+ },
2630
+ {
2631
+ "data": {
2632
+ "col1": "val21a",
2633
+ "col2": "val22a",
2634
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
2635
+ "_ab_source_file_url": "a.csv",
2636
+ },
2637
+ "stream": "stream1",
2638
+ },
2639
+ {
2640
+ "stream1": {
2641
+ "history": {
2642
+ "a.csv": "2023-06-05T03:54:07.000000Z",
2643
+ "c.csv": "2023-06-07T03:54:07.000000Z",
2644
+ "d.csv": "2023-06-08T03:54:07.000000Z",
2645
+ },
2646
+ "_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_d.csv",
2647
+ }
2648
+ },
2649
+ {
2650
+ "data": {
2651
+ "col1": "val11b",
2652
+ "col2": "val12b",
2653
+ "col3": "val13b",
2654
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
2655
+ "_ab_source_file_url": "b.csv",
2656
+ },
2657
+ "stream": "stream1",
2658
+ },
2659
+ {
2660
+ "data": {
2661
+ "col1": "val21b",
2662
+ "col2": "val22b",
2663
+ "col3": "val23b",
2664
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
2665
+ "_ab_source_file_url": "b.csv",
2666
+ },
2667
+ "stream": "stream1",
2668
+ },
2669
+ {
2670
+ "stream1": {
2671
+ "history": {
2672
+ "b.csv": "2023-06-06T03:54:07.000000Z",
2673
+ "c.csv": "2023-06-07T03:54:07.000000Z",
2674
+ "d.csv": "2023-06-08T03:54:07.000000Z",
2675
+ },
2676
+ "_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_d.csv",
2677
+ }
2678
+ },
2679
+ ]
2680
+ )
2681
+ .set_incremental_scenario_config(
2682
+ IncrementalScenarioConfig(
2683
+ input_state=StateBuilder().with_stream_state(
2684
+ "stream1",
2685
+ {
2686
+ "history": {
2687
+ "old_file.csv": "2023-06-05T00:00:00.000000Z",
2688
+ "c.csv": "2023-06-07T03:54:07.000000Z",
2689
+ "d.csv": "2023-06-08T03:54:07.000000Z",
2690
+ },
2691
+ "_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_d.csv",
2692
+ }
2693
+ ).build(),
2694
+ )
2695
+ )
2696
+ ).build()
2697
+
2698
+ multi_csv_sync_files_within_history_time_window_if_history_is_incomplete_different_timestamps_scenario_concurrent_cursor_is_older = (
2699
+ TestScenarioBuilder()
2700
+ .set_name("multi_csv_sync_files_within_history_time_window_if_history_is_incomplete_different_timestamps_scenario_concurrent_cursor_is_older")
2701
+ .set_config(
2702
+ {
2703
+ "streams": [
2704
+ {
2705
+ "name": "stream1",
2706
+ "format": {"filetype": "csv"},
2707
+ "globs": ["*.csv"],
2708
+ "validation_policy": "Emit Record",
2709
+ "days_to_sync_if_history_is_full": 3,
2710
+ }
2711
+ ]
2712
+ }
2713
+ )
2714
+ .set_source_builder(
2715
+ FileBasedSourceBuilder()
2716
+ .set_files(
2717
+ {
2718
+ "a.csv": {
2719
+ "contents": [
2720
+ ("col1", "col2"),
2721
+ ("val11a", "val12a"),
2722
+ ("val21a", "val22a"),
2723
+ ],
2724
+ "last_modified": "2023-06-05T03:54:07.000000Z",
2725
+ },
2726
+ "b.csv": {
2727
+ "contents": [
2728
+ ("col1", "col2", "col3"),
2729
+ ("val11b", "val12b", "val13b"),
2730
+ ("val21b", "val22b", "val23b"),
2731
+ ],
2732
+ "last_modified": "2023-06-06T03:54:07.000000Z",
2733
+ },
2734
+ "c.csv": {
2735
+ "contents": [
2736
+ ("col1", "col2", "col3"),
2737
+ ("val11c", "val12c", "val13c"),
2738
+ ("val21c", "val22c", "val23c"),
2739
+ ],
2740
+ "last_modified": "2023-06-07T03:54:07.000000Z",
2741
+ },
2742
+ "d.csv": {
2743
+ "contents": [
2744
+ ("col1", "col2", "col3"),
2745
+ ("val11d", "val12d", "val13d"),
2746
+ ("val21d", "val22d", "val23d"),
2747
+ ],
2748
+ "last_modified": "2023-06-08T03:54:07.000000Z",
2749
+ },
2750
+ }
2751
+ )
2752
+ .set_file_type("csv")
2753
+ .set_cursor_cls(LowHistoryLimitConcurrentCursor)
2754
+ )
2755
+ .set_expected_catalog(
2756
+ {
2757
+ "streams": [
2758
+ {
2759
+ "default_cursor_field": ["_ab_source_file_last_modified"],
2760
+ "json_schema": {
2761
+ "type": "object",
2762
+ "properties": {
2763
+ "col1": {
2764
+ "type": ["null", "string"],
2765
+ },
2766
+ "col2": {
2767
+ "type": ["null", "string"],
2768
+ },
2769
+ "col3": {
2770
+ "type": ["null", "string"],
2771
+ },
2772
+ "_ab_source_file_last_modified": {"type": "string"},
2773
+ "_ab_source_file_url": {"type": "string"},
2774
+ },
2775
+ },
2776
+ "name": "stream1",
2777
+ "source_defined_cursor": True,
2778
+ "supported_sync_modes": ["full_refresh", "incremental"],
2779
+ }
2780
+ ]
2781
+ }
2782
+ )
2783
+ .set_expected_records(
2784
+ [
2785
+ {
2786
+ "data": {
2787
+ "col1": "val11a",
2788
+ "col2": "val12a",
2789
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
2790
+ "_ab_source_file_url": "a.csv",
2791
+ },
2792
+ "stream": "stream1",
2793
+ },
2794
+ {
2795
+ "data": {
2796
+ "col1": "val21a",
2797
+ "col2": "val22a",
2798
+ "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
2799
+ "_ab_source_file_url": "a.csv",
2800
+ },
2801
+ "stream": "stream1",
2802
+ },
2803
+ {
2804
+ "stream1": {
2805
+ "history": {
2806
+ "a.csv": "2023-06-05T03:54:07.000000Z",
2807
+ "c.csv": "2023-06-07T03:54:07.000000Z",
2808
+ "d.csv": "2023-06-08T03:54:07.000000Z",
2809
+ },
2810
+ "_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_d.csv",
2811
+ }
2812
+ },
2813
+ {
2814
+ "data": {
2815
+ "col1": "val11b",
2816
+ "col2": "val12b",
2817
+ "col3": "val13b",
2818
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
2819
+ "_ab_source_file_url": "b.csv",
2820
+ },
2821
+ "stream": "stream1",
2822
+ },
2823
+ {
2824
+ "data": {
2825
+ "col1": "val21b",
2826
+ "col2": "val22b",
2827
+ "col3": "val23b",
2828
+ "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
2829
+ "_ab_source_file_url": "b.csv",
2830
+ },
2831
+ "stream": "stream1",
2832
+ },
2833
+ {
2834
+ "stream1": {
2835
+ "history": {
2836
+ "b.csv": "2023-06-06T03:54:07.000000Z",
2837
+ "c.csv": "2023-06-07T03:54:07.000000Z",
2838
+ "d.csv": "2023-06-08T03:54:07.000000Z",
2839
+ },
2840
+ "_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_d.csv",
2841
+ }
2842
+ },
2843
+ ]
2844
+ )
2845
+ .set_incremental_scenario_config(
2846
+ IncrementalScenarioConfig(
2847
+ input_state=StateBuilder().with_stream_state(
2848
+ "stream1",
2849
+ {
2850
+ "history": {
2851
+ "old_file.csv": "2023-06-05T00:00:00.000000Z",
2852
+ "c.csv": "2023-06-07T03:54:07.000000Z",
2853
+ "d.csv": "2023-06-08T03:54:07.000000Z",
2854
+ },
2855
+ "_ab_source_file_last_modified": "2023-06-04T00:00:00.000000Z_very_old_file.csv",
2856
+ }
2857
+ ).build(),
2858
+ )
2859
+ )
2860
+ ).build()