airbyte-cdk 0.50.0__py3-none-any.whl → 0.50.2__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (25) hide show
  1. airbyte_cdk/entrypoint.py +7 -0
  2. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3 -3
  3. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +3 -3
  4. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +9 -9
  5. airbyte_cdk/sources/file_based/config/csv_format.py +42 -6
  6. airbyte_cdk/sources/file_based/file_based_source.py +4 -5
  7. airbyte_cdk/sources/file_based/file_types/csv_parser.py +114 -59
  8. airbyte_cdk/sources/file_based/stream/cursor/__init__.py +2 -2
  9. airbyte_cdk/sources/file_based/stream/cursor/{file_based_cursor.py → abstract_file_based_cursor.py} +9 -1
  10. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +10 -10
  11. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +15 -2
  12. {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/METADATA +1 -1
  13. {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/RECORD +25 -24
  14. unit_tests/sources/file_based/config/test_csv_format.py +23 -0
  15. unit_tests/sources/file_based/file_types/test_csv_parser.py +50 -18
  16. unit_tests/sources/file_based/helpers.py +5 -0
  17. unit_tests/sources/file_based/in_memory_files_source.py +11 -3
  18. unit_tests/sources/file_based/scenarios/csv_scenarios.py +1254 -47
  19. unit_tests/sources/file_based/scenarios/incremental_scenarios.py +6 -5
  20. unit_tests/sources/file_based/scenarios/scenario_builder.py +8 -7
  21. unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +13 -12
  22. unit_tests/sources/file_based/test_scenarios.py +30 -0
  23. {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/LICENSE.txt +0 -0
  24. {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/WHEEL +0 -0
  25. {airbyte_cdk-0.50.0.dist-info → airbyte_cdk-0.50.2.dist-info}/top_level.txt +0 -0
@@ -45,45 +45,68 @@ single_csv_scenario = (
45
45
  "properties": {
46
46
  "streams": {
47
47
  "title": "The list of streams to sync",
48
- "description": 'Each instance of this configuration defines a <a href="https://docs.airbyte.com/cloud/core-concepts#stream">stream</a>. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.',
48
+ "description": "Each instance of this configuration defines a <a href=\"https://docs.airbyte.com/cloud/core-concepts#stream\">stream</a>. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.",
49
49
  "order": 10,
50
50
  "type": "array",
51
51
  "items": {
52
52
  "title": "FileBasedStreamConfig",
53
53
  "type": "object",
54
54
  "properties": {
55
- "name": {"title": "Name", "description": "The name of the stream.", "type": "string"},
55
+ "name": {
56
+ "title": "Name",
57
+ "description": "The name of the stream.",
58
+ "type": "string"
59
+ },
56
60
  "file_type": {
57
61
  "title": "File Type",
58
62
  "description": "The data file type that is being extracted for a stream.",
59
- "type": "string",
63
+ "type": "string"
60
64
  },
61
65
  "globs": {
62
66
  "title": "Globs",
63
- "description": 'The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look <a href="https://en.wikipedia.org/wiki/Glob_(programming)">here</a>.',
67
+ "description": "The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look <a href=\"https://en.wikipedia.org/wiki/Glob_(programming)\">here</a>.",
64
68
  "type": "array",
65
- "items": {"type": "string"},
69
+ "items": {
70
+ "type": "string"
71
+ }
66
72
  },
67
73
  "validation_policy": {
68
74
  "title": "Validation Policy",
69
75
  "description": "The name of the validation policy that dictates sync behavior when a record does not adhere to the stream schema.",
70
- "type": "string",
76
+ "type": "string"
71
77
  },
72
78
  "input_schema": {
73
79
  "title": "Input Schema",
74
80
  "description": "The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.",
75
- "oneOf": [{"type": "object"}, {"type": "string"}],
81
+ "oneOf": [
82
+ {
83
+ "type": "object"
84
+ },
85
+ {
86
+ "type": "string"
87
+ }
88
+ ]
76
89
  },
77
90
  "primary_key": {
78
91
  "title": "Primary Key",
79
92
  "description": "The column or columns (for a composite key) that serves as the unique identifier of a record.",
80
- "oneOf": [{"type": "string"}, {"type": "array", "items": {"type": "string"}}],
93
+ "oneOf": [
94
+ {
95
+ "type": "string"
96
+ },
97
+ {
98
+ "type": "array",
99
+ "items": {
100
+ "type": "string"
101
+ }
102
+ }
103
+ ]
81
104
  },
82
105
  "days_to_sync_if_history_is_full": {
83
106
  "title": "Days To Sync If History Is Full",
84
107
  "description": "When the state history of the file store is full, syncs will only read files that were last modified in the provided day range.",
85
108
  "default": 3,
86
- "type": "integer",
109
+ "type": "integer"
87
110
  },
88
111
  "format": {
89
112
  "oneOf": [
@@ -100,16 +123,18 @@ single_csv_scenario = (
100
123
  "filetype": {
101
124
  "title": "Filetype",
102
125
  "default": "avro",
103
- "enum": ["avro"],
104
- "type": "string",
126
+ "enum": [
127
+ "avro"
128
+ ],
129
+ "type": "string"
105
130
  },
106
131
  "decimal_as_float": {
107
132
  "title": "Convert Decimal Fields to Floats",
108
133
  "description": "Whether to convert decimal fields to floats. There is a loss of precision when converting decimals to floats, so this is not recommended.",
109
134
  "default": False,
110
- "type": "boolean",
111
- },
112
- },
135
+ "type": "boolean"
136
+ }
137
+ }
113
138
  },
114
139
  {
115
140
  "title": "CsvFormat",
@@ -118,37 +143,39 @@ single_csv_scenario = (
118
143
  "filetype": {
119
144
  "title": "Filetype",
120
145
  "default": "csv",
121
- "enum": ["csv"],
122
- "type": "string",
146
+ "enum": [
147
+ "csv"
148
+ ],
149
+ "type": "string"
123
150
  },
124
151
  "delimiter": {
125
152
  "title": "Delimiter",
126
153
  "description": "The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\\t'.",
127
154
  "default": ",",
128
- "type": "string",
155
+ "type": "string"
129
156
  },
130
157
  "quote_char": {
131
158
  "title": "Quote Character",
132
159
  "description": "The character used for quoting CSV values. To disallow quoting, make this field blank.",
133
- "default": '"',
134
- "type": "string",
160
+ "default": "\"",
161
+ "type": "string"
135
162
  },
136
163
  "escape_char": {
137
164
  "title": "Escape Character",
138
165
  "description": "The character used for escaping special characters. To disallow escaping, leave this field blank.",
139
- "type": "string",
166
+ "type": "string"
140
167
  },
141
168
  "encoding": {
142
169
  "title": "Encoding",
143
- "description": 'The character encoding of the CSV data. Leave blank to default to <strong>UTF8</strong>. See <a href="https://docs.python.org/3/library/codecs.html#standard-encodings" target="_blank">list of python encodings</a> for allowable options.',
170
+ "description": "The character encoding of the CSV data. Leave blank to default to <strong>UTF8</strong>. See <a href=\"https://docs.python.org/3/library/codecs.html#standard-encodings\" target=\"_blank\">list of python encodings</a> for allowable options.",
144
171
  "default": "utf8",
145
- "type": "string",
172
+ "type": "string"
146
173
  },
147
174
  "double_quote": {
148
175
  "title": "Double Quote",
149
176
  "description": "Whether two quotes in a quoted CSV value denote a single quote in the data.",
150
177
  "default": True,
151
- "type": "boolean",
178
+ "type": "boolean"
152
179
  },
153
180
  "quoting_behavior": {
154
181
  "title": "Quoting Behavior",
@@ -158,10 +185,72 @@ single_csv_scenario = (
158
185
  "Quote All",
159
186
  "Quote Special Characters",
160
187
  "Quote Non-numeric",
161
- "Quote None",
188
+ "Quote None"
189
+ ]
190
+ },
191
+ "null_values": {
192
+ "title": "Null Values",
193
+ "description": "A set of case-sensitive strings that should be interpreted as null values. For example, if the value 'NA' should be interpreted as null, enter 'NA' in this field.",
194
+ "default": [],
195
+ "type": "array",
196
+ "items": {
197
+ "type": "string"
198
+ },
199
+ "uniqueItems": True
200
+ },
201
+ "skip_rows_before_header": {
202
+ "title": "Skip Rows Before Header",
203
+ "description": "The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field.",
204
+ "default": 0,
205
+ "type": "integer"
206
+ },
207
+ "skip_rows_after_header": {
208
+ "title": "Skip Rows After Header",
209
+ "description": "The number of rows to skip after the header row.",
210
+ "default": 0,
211
+ "type": "integer"
212
+ },
213
+ "autogenerate_column_names": {
214
+ "title": "Autogenerate Column Names",
215
+ "description": "Whether to autogenerate column names if column_names is empty. If true, column names will be of the form \u201cf0\u201d, \u201cf1\u201d\u2026 If false, column names will be read from the first CSV row after skip_rows_before_header.",
216
+ "default": False,
217
+ "type": "boolean"
218
+ },
219
+ "true_values": {
220
+ "title": "True Values",
221
+ "description": "A set of case-sensitive strings that should be interpreted as true values.",
222
+ "default": [
223
+ "y",
224
+ "yes",
225
+ "t",
226
+ "true",
227
+ "on",
228
+ "1"
162
229
  ],
230
+ "type": "array",
231
+ "items": {
232
+ "type": "string"
233
+ },
234
+ "uniqueItems": True
163
235
  },
164
- },
236
+ "false_values": {
237
+ "title": "False Values",
238
+ "description": "A set of case-sensitive strings that should be interpreted as false values.",
239
+ "default": [
240
+ "n",
241
+ "no",
242
+ "f",
243
+ "false",
244
+ "off",
245
+ "0"
246
+ ],
247
+ "type": "array",
248
+ "items": {
249
+ "type": "string"
250
+ },
251
+ "uniqueItems": True
252
+ }
253
+ }
165
254
  },
166
255
  {
167
256
  "title": "JsonlFormat",
@@ -170,10 +259,12 @@ single_csv_scenario = (
170
259
  "filetype": {
171
260
  "title": "Filetype",
172
261
  "default": "jsonl",
173
- "enum": ["jsonl"],
174
- "type": "string",
262
+ "enum": [
263
+ "jsonl"
264
+ ],
265
+ "type": "string"
175
266
  }
176
- },
267
+ }
177
268
  },
178
269
  {
179
270
  "title": "ParquetFormat",
@@ -182,50 +273,67 @@ single_csv_scenario = (
182
273
  "filetype": {
183
274
  "title": "Filetype",
184
275
  "default": "parquet",
185
- "enum": ["parquet"],
186
- "type": "string",
276
+ "enum": [
277
+ "parquet"
278
+ ],
279
+ "type": "string"
187
280
  },
188
281
  "decimal_as_float": {
189
282
  "title": "Convert Decimal Fields to Floats",
190
283
  "description": "Whether to convert decimal fields to floats. There is a loss of precision when converting decimals to floats, so this is not recommended.",
191
284
  "default": False,
192
- "type": "boolean",
193
- },
194
- },
195
- },
285
+ "type": "boolean"
286
+ }
287
+ }
288
+ }
196
289
  ]
197
- },
290
+ }
198
291
  },
199
292
  {
200
293
  "title": "Legacy Format",
201
- "required": ["filetype"],
294
+ "required": [
295
+ "filetype"
296
+ ],
202
297
  "type": "object",
203
- "properties": {"filetype": {"title": "Filetype", "type": "string"}},
204
- },
298
+ "properties": {
299
+ "filetype": {
300
+ "title": "Filetype",
301
+ "type": "string"
302
+ }
303
+ }
304
+ }
205
305
  ]
206
306
  },
207
307
  "schemaless": {
208
308
  "title": "Schemaless",
209
309
  "description": "When enabled, syncs will not validate or structure records against the stream's schema.",
210
310
  "default": False,
211
- "type": "boolean",
212
- },
311
+ "type": "boolean"
312
+ }
213
313
  },
214
- "required": ["name", "file_type", "validation_policy"],
215
- },
314
+ "required": [
315
+ "name",
316
+ "file_type",
317
+ "validation_policy"
318
+ ]
319
+ }
216
320
  },
217
321
  "start_date": {
218
322
  "title": "Start Date",
219
323
  "description": "UTC date and time in the format 2017-01-25T00:00:00Z. Any file modified before this date will not be replicated.",
220
- "examples": ["2021-01-01T00:00:00Z"],
324
+ "examples": [
325
+ "2021-01-01T00:00:00Z"
326
+ ],
221
327
  "format": "date-time",
222
328
  "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$",
223
329
  "order": 1,
224
- "type": "string",
225
- },
330
+ "type": "string"
331
+ }
226
332
  },
227
- "required": ["streams"],
228
- },
333
+ "required": [
334
+ "streams"
335
+ ]
336
+ }
229
337
  }
230
338
  )
231
339
  .set_expected_catalog(
@@ -1531,3 +1639,1102 @@ schemaless_with_user_input_schema_fails_connection_check_multi_stream_scenario =
1531
1639
  .set_expected_discover_error(ConfigValidationError, FileBasedSourceError.CONFIG_VALIDATION_ERROR.value)
1532
1640
  .set_expected_read_error(ConfigValidationError, FileBasedSourceError.CONFIG_VALIDATION_ERROR.value)
1533
1641
  ).build()
1642
+
1643
+ csv_string_can_be_null_with_input_schemas_scenario = (
1644
+ TestScenarioBuilder()
1645
+ .set_name("csv_string_can_be_null_with_input_schema")
1646
+ .set_config(
1647
+ {
1648
+ "streams": [
1649
+ {
1650
+ "name": "stream1",
1651
+ "file_type": "csv",
1652
+ "globs": ["*"],
1653
+ "validation_policy": "emit_record",
1654
+ "input_schema": {"col1": "string", "col2": "string"},
1655
+ "format": {
1656
+ "csv": {
1657
+ "filetype": "csv",
1658
+ "null_values": ["null"],
1659
+ }
1660
+ }
1661
+ }
1662
+ ],
1663
+ "start_date": "2023-06-04T03:54:07Z"
1664
+ }
1665
+ )
1666
+ .set_files(
1667
+ {
1668
+ "a.csv": {
1669
+ "contents": [
1670
+ ("col1", "col2"),
1671
+ ("2", "null"),
1672
+ ],
1673
+ "last_modified": "2023-06-05T03:54:07.000000Z",
1674
+ }
1675
+ }
1676
+ )
1677
+ .set_file_type("csv")
1678
+ .set_expected_catalog(
1679
+ {
1680
+ "streams": [
1681
+ {
1682
+ "default_cursor_field": ["_ab_source_file_last_modified"],
1683
+ "json_schema": {
1684
+ "type": "object",
1685
+ "properties": {
1686
+ "col1": {
1687
+ "type": "string"
1688
+ },
1689
+ "col2": {
1690
+ "type": "string"
1691
+ },
1692
+ "_ab_source_file_last_modified": {
1693
+ "type": "string"
1694
+ },
1695
+ "_ab_source_file_url": {
1696
+ "type": "string"
1697
+ },
1698
+ },
1699
+ },
1700
+ "name": "stream1",
1701
+ "source_defined_cursor": True,
1702
+ "supported_sync_modes": ["full_refresh", "incremental"],
1703
+ }
1704
+ ]
1705
+ }
1706
+ )
1707
+ .set_expected_records(
1708
+ [
1709
+ {"data": {"col1": "2", "col2": None, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1710
+ "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
1711
+ ]
1712
+ )
1713
+ ).build()
1714
+
1715
+ csv_string_not_null_if_no_null_values_scenario = (
1716
+ TestScenarioBuilder()
1717
+ .set_name("csv_string_not_null_if_no_null_values")
1718
+ .set_config(
1719
+ {
1720
+ "streams": [
1721
+ {
1722
+ "name": "stream1",
1723
+ "file_type": "csv",
1724
+ "globs": ["*"],
1725
+ "validation_policy": "emit_record",
1726
+ "format": {
1727
+ "csv": {
1728
+ "filetype": "csv",
1729
+ }
1730
+ }
1731
+ }
1732
+ ],
1733
+ "start_date": "2023-06-04T03:54:07Z"
1734
+ }
1735
+ )
1736
+ .set_files(
1737
+ {
1738
+ "a.csv": {
1739
+ "contents": [
1740
+ ("col1", "col2"),
1741
+ ("2", "null"),
1742
+ ],
1743
+ "last_modified": "2023-06-05T03:54:07.000Z",
1744
+ }
1745
+ }
1746
+ )
1747
+ .set_file_type("csv")
1748
+ .set_expected_catalog(
1749
+ {
1750
+ "streams": [
1751
+ {
1752
+ "default_cursor_field": ["_ab_source_file_last_modified"],
1753
+ "json_schema": {
1754
+ "type": "object",
1755
+ "properties": {
1756
+ "col1": {
1757
+ "type": ["null", "string"]
1758
+ },
1759
+ "col2": {
1760
+ "type": ["null", "string"]
1761
+ },
1762
+ "_ab_source_file_last_modified": {
1763
+ "type": "string"
1764
+ },
1765
+ "_ab_source_file_url": {
1766
+ "type": "string"
1767
+ },
1768
+ },
1769
+ },
1770
+ "name": "stream1",
1771
+ "source_defined_cursor": True,
1772
+ "supported_sync_modes": ["full_refresh", "incremental"],
1773
+ }
1774
+ ]
1775
+ }
1776
+ )
1777
+ .set_expected_records(
1778
+ [
1779
+ {"data": {"col1": "2", "col2": "null", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1780
+ "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
1781
+ ]
1782
+ )
1783
+ ).build()
1784
+
1785
+ csv_strings_can_be_null_not_quoted_scenario = (
1786
+ TestScenarioBuilder()
1787
+ .set_name("csv_strings_can_be_null_no_input_schema")
1788
+ .set_config(
1789
+ {
1790
+ "streams": [
1791
+ {
1792
+ "name": "stream1",
1793
+ "file_type": "csv",
1794
+ "globs": ["*"],
1795
+ "validation_policy": "emit_record",
1796
+ "format": {
1797
+ "csv": {
1798
+ "filetype": "csv",
1799
+ "null_values": ["null"]
1800
+ }
1801
+ }
1802
+ }
1803
+ ],
1804
+ "start_date": "2023-06-04T03:54:07Z"
1805
+ }
1806
+ )
1807
+ .set_files(
1808
+ {
1809
+ "a.csv": {
1810
+ "contents": [
1811
+ ("col1", "col2"),
1812
+ ("2", "null"),
1813
+ ],
1814
+ "last_modified": "2023-06-05T03:54:07.000Z",
1815
+ }
1816
+ }
1817
+ )
1818
+ .set_file_type("csv")
1819
+ .set_expected_catalog(
1820
+ {
1821
+ "streams": [
1822
+ {
1823
+ "default_cursor_field": ["_ab_source_file_last_modified"],
1824
+ "json_schema": {
1825
+ "type": "object",
1826
+ "properties": {
1827
+ "col1": {
1828
+ "type": ["null", "string"]
1829
+ },
1830
+ "col2": {
1831
+ "type": ["null", "string"]
1832
+ },
1833
+ "_ab_source_file_last_modified": {
1834
+ "type": "string"
1835
+ },
1836
+ "_ab_source_file_url": {
1837
+ "type": "string"
1838
+ },
1839
+ },
1840
+ },
1841
+ "name": "stream1",
1842
+ "source_defined_cursor": True,
1843
+ "supported_sync_modes": ["full_refresh", "incremental"],
1844
+ }
1845
+ ]
1846
+ }
1847
+ )
1848
+ .set_expected_records(
1849
+ [
1850
+ {"data": {"col1": "2", "col2": None, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1851
+ "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
1852
+ ]
1853
+ )
1854
+ ).build()
1855
+
1856
+ csv_newline_in_values_quoted_value_scenario = (
1857
+ TestScenarioBuilder()
1858
+ .set_name("csv_newline_in_values_quoted_value")
1859
+ .set_config(
1860
+ {
1861
+ "streams": [
1862
+ {
1863
+ "name": "stream1",
1864
+ "file_type": "csv",
1865
+ "globs": ["*"],
1866
+ "validation_policy": "emit_record",
1867
+ "format": {
1868
+ "csv": {
1869
+ "filetype": "csv",
1870
+ "quoting_behavior": "Quote All"
1871
+ }
1872
+ }
1873
+ }
1874
+ ],
1875
+ "start_date": "2023-06-04T03:54:07Z"
1876
+ }
1877
+ )
1878
+ .set_files(
1879
+ {
1880
+ "a.csv": {
1881
+ "contents": [
1882
+ '''"col1","col2"''',
1883
+ '''"2","val\n2"''',
1884
+ ],
1885
+ "last_modified": "2023-06-05T03:54:07.000Z",
1886
+ }
1887
+ }
1888
+ )
1889
+ .set_file_type("csv")
1890
+ .set_expected_catalog(
1891
+ {
1892
+ "streams": [
1893
+ {
1894
+ "default_cursor_field": ["_ab_source_file_last_modified"],
1895
+ "json_schema": {
1896
+ "type": "object",
1897
+ "properties": {
1898
+ "col1": {
1899
+ "type": ["null", "string"]
1900
+ },
1901
+ "col2": {
1902
+ "type": ["null", "string"]
1903
+ },
1904
+ "_ab_source_file_last_modified": {
1905
+ "type": "string"
1906
+ },
1907
+ "_ab_source_file_url": {
1908
+ "type": "string"
1909
+ },
1910
+ },
1911
+ },
1912
+ "name": "stream1",
1913
+ "source_defined_cursor": True,
1914
+ "supported_sync_modes": ["full_refresh", "incremental"],
1915
+ }
1916
+ ]
1917
+ }
1918
+ )
1919
+ .set_expected_records(
1920
+ [
1921
+ {"data": {"col1": "2", "col2": 'val\n2', "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1922
+ "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
1923
+ ]
1924
+ )
1925
+ ).build()
1926
+
1927
+ csv_newline_in_values_not_quoted_scenario = (
1928
+ TestScenarioBuilder()
1929
+ .set_name("csv_newline_in_values_not_quoted")
1930
+ .set_config(
1931
+ {
1932
+ "streams": [
1933
+ {
1934
+ "name": "stream1",
1935
+ "file_type": "csv",
1936
+ "globs": ["*"],
1937
+ "validation_policy": "emit_record",
1938
+ "format": {
1939
+ "csv": {
1940
+ "filetype": "csv",
1941
+ }
1942
+ }
1943
+ }
1944
+ ],
1945
+ "start_date": "2023-06-04T03:54:07Z"
1946
+ }
1947
+ )
1948
+ .set_files(
1949
+ {
1950
+ "a.csv": {
1951
+ "contents": [
1952
+ '''col1,col2''',
1953
+ '''2,val\n2''',
1954
+ ],
1955
+ "last_modified": "2023-06-05T03:54:07.000Z",
1956
+ }
1957
+ }
1958
+ )
1959
+ .set_file_type("csv")
1960
+ .set_expected_catalog(
1961
+ {
1962
+ "streams": [
1963
+ {
1964
+ "default_cursor_field": ["_ab_source_file_last_modified"],
1965
+ "json_schema": {
1966
+ "type": "object",
1967
+ "properties": {
1968
+ "col1": {
1969
+ "type": ["null", "string"]
1970
+ },
1971
+ "col2": {
1972
+ "type": ["null", "string"]
1973
+ },
1974
+ "_ab_source_file_last_modified": {
1975
+ "type": "string"
1976
+ },
1977
+ "_ab_source_file_url": {
1978
+ "type": "string"
1979
+ },
1980
+ },
1981
+ },
1982
+ "name": "stream1",
1983
+ "source_defined_cursor": True,
1984
+ "supported_sync_modes": ["full_refresh", "incremental"],
1985
+ }
1986
+ ]
1987
+ }
1988
+ )
1989
+ .set_expected_records(
1990
+ [
1991
+ # Note that the value for col2 is truncated to "val" because the newline is not escaped
1992
+ {"data": {"col1": "2", "col2": 'val', "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
1993
+ "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
1994
+ ]
1995
+ )
1996
+ .set_expected_logs({"read": [
1997
+ {
1998
+ "level": "ERROR",
1999
+ "message": "Error parsing record. This could be due to a mismatch between the config's file type and the actual file type, or because the file or record is not parseable. stream=stream1 file=a.csv line_no=2 n_skipped=0",
2000
+ }
2001
+ ]})
2002
+ ).build()
2003
+
2004
+ csv_escape_char_is_set_scenario = (
2005
+ TestScenarioBuilder()
2006
+ .set_name("csv_escape_char_is_set")
2007
+ .set_config(
2008
+ {
2009
+ "streams": [
2010
+ {
2011
+ "name": "stream1",
2012
+ "file_type": "csv",
2013
+ "globs": ["*"],
2014
+ "validation_policy": "emit_record",
2015
+ "format": {
2016
+ "csv": {
2017
+ "filetype": "csv",
2018
+ "double_quotes": False,
2019
+ "quote_char": '"',
2020
+ "delimiter": ",",
2021
+ "escape_char": "\\",
2022
+ "quoting_behavior": "Quote All",
2023
+
2024
+ }
2025
+ }
2026
+ }
2027
+ ],
2028
+ "start_date": "2023-06-04T03:54:07Z"
2029
+ }
2030
+ )
2031
+ .set_files(
2032
+ {
2033
+ "a.csv": {
2034
+ "contents": [
2035
+ '''col1,col2''',
2036
+ '''val11,"val\\"2"''',
2037
+ ],
2038
+ "last_modified": "2023-06-05T03:54:07.000Z",
2039
+ }
2040
+ }
2041
+ )
2042
+ .set_file_type("csv")
2043
+ .set_expected_catalog(
2044
+ {
2045
+ "streams": [
2046
+ {
2047
+ "default_cursor_field": ["_ab_source_file_last_modified"],
2048
+ "json_schema": {
2049
+ "type": "object",
2050
+ "properties": {
2051
+ "col1": {
2052
+ "type": ["null", "string"]
2053
+ },
2054
+ "col2": {
2055
+ "type": ["null", "string"]
2056
+ },
2057
+ "_ab_source_file_last_modified": {
2058
+ "type": "string"
2059
+ },
2060
+ "_ab_source_file_url": {
2061
+ "type": "string"
2062
+ },
2063
+ },
2064
+ },
2065
+ "name": "stream1",
2066
+ "source_defined_cursor": True,
2067
+ "supported_sync_modes": ["full_refresh", "incremental"],
2068
+ }
2069
+ ]
2070
+ }
2071
+ )
2072
+ .set_expected_records(
2073
+ [
2074
+ {"data": {"col1": 'val11', "col2": 'val"2', "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
2075
+ "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
2076
+ ]
2077
+ )
2078
+ ).build()
2079
+
2080
+ csv_double_quote_is_set_scenario = (
2081
+ TestScenarioBuilder()
2082
+ .set_name("csv_doublequote_is_set")
2083
+ # This scenario tests that quotes are properly escaped when double_quotes is True
2084
+ .set_config(
2085
+ {
2086
+ "streams": [
2087
+ {
2088
+ "name": "stream1",
2089
+ "file_type": "csv",
2090
+ "globs": ["*"],
2091
+ "validation_policy": "emit_record",
2092
+ "format": {
2093
+ "csv": {
2094
+ "filetype": "csv",
2095
+ "double_quotes": True,
2096
+ "quote_char": '"',
2097
+ "delimiter": ",",
2098
+ "quoting_behavior": "Quote All",
2099
+
2100
+ }
2101
+ }
2102
+ }
2103
+ ],
2104
+ "start_date": "2023-06-04T03:54:07Z"
2105
+ }
2106
+ )
2107
+ .set_files(
2108
+ {
2109
+ "a.csv": {
2110
+ "contents": [
2111
+ '''col1,col2''',
2112
+ '''val11,"val""2"''',
2113
+ ],
2114
+ "last_modified": "2023-06-05T03:54:07.000Z",
2115
+ }
2116
+ }
2117
+ )
2118
+ .set_file_type("csv")
2119
+ .set_expected_catalog(
2120
+ {
2121
+ "streams": [
2122
+ {
2123
+ "default_cursor_field": ["_ab_source_file_last_modified"],
2124
+ "json_schema": {
2125
+ "type": "object",
2126
+ "properties": {
2127
+ "col1": {
2128
+ "type": ["null", "string"]
2129
+ },
2130
+ "col2": {
2131
+ "type": ["null", "string"]
2132
+ },
2133
+ "_ab_source_file_last_modified": {
2134
+ "type": "string"
2135
+ },
2136
+ "_ab_source_file_url": {
2137
+ "type": "string"
2138
+ },
2139
+ },
2140
+ },
2141
+ "name": "stream1",
2142
+ "source_defined_cursor": True,
2143
+ "supported_sync_modes": ["full_refresh", "incremental"],
2144
+ }
2145
+ ]
2146
+ }
2147
+ )
2148
+ .set_expected_records(
2149
+ [
2150
+ {"data": {"col1": 'val11', "col2": 'val"2', "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
2151
+ "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
2152
+ ]
2153
+ )
2154
+ ).build()
2155
+
2156
+ csv_custom_delimiter_with_escape_char_scenario = (
2157
+ TestScenarioBuilder()
2158
+ .set_name("csv_custom_delimiter_with_escape_char")
2159
+ # This scenario tests that a value can contain the delimiter if it is wrapped in the quote_char
2160
+ .set_config(
2161
+ {
2162
+ "streams": [
2163
+ {
2164
+ "name": "stream1",
2165
+ "file_type": "csv",
2166
+ "globs": ["*"],
2167
+ "validation_policy": "emit_record",
2168
+ "format": {
2169
+ "csv": {
2170
+ "filetype": "csv",
2171
+ "double_quotes": True,
2172
+ "quote_char": '@',
2173
+ "delimiter": "|",
2174
+ "escape_char": "+"
2175
+ }
2176
+ }
2177
+ }
2178
+ ],
2179
+ "start_date": "2023-06-04T03:54:07Z"
2180
+ }
2181
+ )
2182
+ .set_files(
2183
+ {
2184
+ "a.csv": {
2185
+ "contents": [
2186
+ '''col1|col2''',
2187
+ '''val"1,1|val+|2''',
2188
+ ],
2189
+ "last_modified": "2023-06-05T03:54:07.000Z",
2190
+ }
2191
+ }
2192
+ )
2193
+ .set_file_type("csv")
2194
+ .set_expected_catalog(
2195
+ {
2196
+ "streams": [
2197
+ {
2198
+ "default_cursor_field": ["_ab_source_file_last_modified"],
2199
+ "json_schema": {
2200
+ "type": "object",
2201
+ "properties": {
2202
+ "col1": {
2203
+ "type": ["null", "string"]
2204
+ },
2205
+ "col2": {
2206
+ "type": ["null", "string"]
2207
+ },
2208
+ "_ab_source_file_last_modified": {
2209
+ "type": "string"
2210
+ },
2211
+ "_ab_source_file_url": {
2212
+ "type": "string"
2213
+ },
2214
+ },
2215
+ },
2216
+ "name": "stream1",
2217
+ "source_defined_cursor": True,
2218
+ "supported_sync_modes": ["full_refresh", "incremental"],
2219
+ }
2220
+ ]
2221
+ }
2222
+ )
2223
+ .set_expected_records(
2224
+ [
2225
+ {"data": {"col1": 'val"1,1', "col2": 'val|2', "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
2226
+ "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
2227
+ ]
2228
+ )
2229
+ ).build()
2230
+
2231
+ csv_custom_delimiter_in_double_quotes_scenario = (
2232
+ TestScenarioBuilder()
2233
+ .set_name("csv_custom_delimiter_in_double_quotes")
2234
+ # This scenario tests that a value can contain the delimiter if it is wrapped in the quote_char
2235
+ .set_config(
2236
+ {
2237
+ "streams": [
2238
+ {
2239
+ "name": "stream1",
2240
+ "file_type": "csv",
2241
+ "globs": ["*"],
2242
+ "validation_policy": "emit_record",
2243
+ "format": {
2244
+ "csv": {
2245
+ "filetype": "csv",
2246
+ "double_quotes": True,
2247
+ "quote_char": '@',
2248
+ "delimiter": "|",
2249
+ }
2250
+ }
2251
+ }
2252
+ ],
2253
+ "start_date": "2023-06-04T03:54:07Z"
2254
+ }
2255
+ )
2256
+ .set_files(
2257
+ {
2258
+ "a.csv": {
2259
+ "contents": [
2260
+ '''col1|col2''',
2261
+ '''val"1,1|@val|2@''',
2262
+ ],
2263
+ "last_modified": "2023-06-05T03:54:07.000Z",
2264
+ }
2265
+ }
2266
+ )
2267
+ .set_file_type("csv")
2268
+ .set_expected_catalog(
2269
+ {
2270
+ "streams": [
2271
+ {
2272
+ "default_cursor_field": ["_ab_source_file_last_modified"],
2273
+ "json_schema": {
2274
+ "type": "object",
2275
+ "properties": {
2276
+ "col1": {
2277
+ "type": ["null", "string"]
2278
+ },
2279
+ "col2": {
2280
+ "type": ["null", "string"]
2281
+ },
2282
+ "_ab_source_file_last_modified": {
2283
+ "type": "string"
2284
+ },
2285
+ "_ab_source_file_url": {
2286
+ "type": "string"
2287
+ },
2288
+ },
2289
+ },
2290
+ "name": "stream1",
2291
+ "source_defined_cursor": True,
2292
+ "supported_sync_modes": ["full_refresh", "incremental"],
2293
+ }
2294
+ ]
2295
+ }
2296
+ )
2297
+ .set_expected_records(
2298
+ [
2299
+ {"data": {"col1": 'val"1,1', "col2": 'val|2', "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
2300
+ "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
2301
+ ]
2302
+ )
2303
+ ).build()
2304
+
2305
+
2306
+ csv_skip_before_header_scenario = (
2307
+ TestScenarioBuilder()
2308
+ .set_name("csv_skip_before_header")
2309
+ .set_config(
2310
+ {
2311
+ "streams": [
2312
+ {
2313
+ "name": "stream1",
2314
+ "file_type": "csv",
2315
+ "globs": ["*"],
2316
+ "validation_policy": "emit_record",
2317
+ "format": {
2318
+ "csv": {
2319
+ "filetype": "csv",
2320
+ "skip_rows_before_header": 2
2321
+ }
2322
+ }
2323
+ }
2324
+ ],
2325
+ "start_date": "2023-06-04T03:54:07Z"
2326
+ }
2327
+ )
2328
+ .set_files(
2329
+ {
2330
+ "a.csv": {
2331
+ "contents": [
2332
+ ("skip_this", "skip_this"),
2333
+ ("skip_this_too", "skip_this_too"),
2334
+ ("col1", "col2"),
2335
+ ("val11", "val12"),
2336
+ ],
2337
+ "last_modified": "2023-06-05T03:54:07.000Z",
2338
+ }
2339
+ }
2340
+ )
2341
+ .set_file_type("csv")
2342
+ .set_expected_catalog(
2343
+ {
2344
+ "streams": [
2345
+ {
2346
+ "default_cursor_field": ["_ab_source_file_last_modified"],
2347
+ "json_schema": {
2348
+ "type": "object",
2349
+ "properties": {
2350
+ "col1": {
2351
+ "type": ["null", "string"]
2352
+ },
2353
+ "col2": {
2354
+ "type": ["null", "string"]
2355
+ },
2356
+ "_ab_source_file_last_modified": {
2357
+ "type": "string"
2358
+ },
2359
+ "_ab_source_file_url": {
2360
+ "type": "string"
2361
+ },
2362
+ },
2363
+ },
2364
+ "name": "stream1",
2365
+ "source_defined_cursor": True,
2366
+ "supported_sync_modes": ["full_refresh", "incremental"],
2367
+ }
2368
+ ]
2369
+ }
2370
+ )
2371
+ .set_expected_records(
2372
+ [
2373
+ {"data": {"col1": "val11", "col2": "val12", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
2374
+ "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
2375
+ ]
2376
+ )
2377
+ ).build()
2378
+
2379
+ csv_skip_after_header_scenario = (
2380
+ TestScenarioBuilder()
2381
+ .set_name("csv_skip_after_header")
2382
+ .set_config(
2383
+ {
2384
+ "streams": [
2385
+ {
2386
+ "name": "stream1",
2387
+ "file_type": "csv",
2388
+ "globs": ["*"],
2389
+ "validation_policy": "emit_record",
2390
+ "format": {
2391
+ "csv": {
2392
+ "filetype": "csv",
2393
+ "skip_rows_after_header": 2
2394
+ }
2395
+ }
2396
+ }
2397
+ ],
2398
+ "start_date": "2023-06-04T03:54:07Z"
2399
+ }
2400
+ )
2401
+ .set_files(
2402
+ {
2403
+ "a.csv": {
2404
+ "contents": [
2405
+ ("col1", "col2"),
2406
+ ("skip_this", "skip_this"),
2407
+ ("skip_this_too", "skip_this_too"),
2408
+ ("val11", "val12"),
2409
+ ],
2410
+ "last_modified": "2023-06-05T03:54:07.000Z",
2411
+ }
2412
+ }
2413
+ )
2414
+ .set_file_type("csv")
2415
+ .set_expected_catalog(
2416
+ {
2417
+ "streams": [
2418
+ {
2419
+ "default_cursor_field": ["_ab_source_file_last_modified"],
2420
+ "json_schema": {
2421
+ "type": "object",
2422
+ "properties": {
2423
+ "col1": {
2424
+ "type": ["null", "string"]
2425
+ },
2426
+ "col2": {
2427
+ "type": ["null", "string"]
2428
+ },
2429
+ "_ab_source_file_last_modified": {
2430
+ "type": "string"
2431
+ },
2432
+ "_ab_source_file_url": {
2433
+ "type": "string"
2434
+ },
2435
+ },
2436
+ },
2437
+ "name": "stream1",
2438
+ "source_defined_cursor": True,
2439
+ "supported_sync_modes": ["full_refresh", "incremental"],
2440
+ }
2441
+ ]
2442
+ }
2443
+ )
2444
+ .set_expected_records(
2445
+ [
2446
+ {"data": {"col1": "val11", "col2": "val12", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
2447
+ "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
2448
+ ]
2449
+ )
2450
+ ).build()
2451
+
2452
+
2453
+ csv_skip_before_and_after_header_scenario = (
2454
+ TestScenarioBuilder()
2455
+ .set_name("csv_skip_before_after_header")
2456
+ .set_config(
2457
+ {
2458
+ "streams": [
2459
+ {
2460
+ "name": "stream1",
2461
+ "file_type": "csv",
2462
+ "globs": ["*"],
2463
+ "validation_policy": "emit_record",
2464
+ "format": {
2465
+ "csv": {
2466
+ "filetype": "csv",
2467
+ "skip_rows_before_header": 1,
2468
+ "skip_rows_after_header": 1,
2469
+ }
2470
+ }
2471
+ }
2472
+ ],
2473
+ "start_date": "2023-06-04T03:54:07Z"
2474
+ }
2475
+ )
2476
+ .set_files(
2477
+ {
2478
+ "a.csv": {
2479
+ "contents": [
2480
+ ("skip_this", "skip_this"),
2481
+ ("col1", "col2"),
2482
+ ("skip_this_too", "skip_this_too"),
2483
+ ("val11", "val12"),
2484
+ ],
2485
+ "last_modified": "2023-06-05T03:54:07.000Z",
2486
+ }
2487
+ }
2488
+ )
2489
+ .set_file_type("csv")
2490
+ .set_expected_catalog(
2491
+ {
2492
+ "streams": [
2493
+ {
2494
+ "default_cursor_field": ["_ab_source_file_last_modified"],
2495
+ "json_schema": {
2496
+ "type": "object",
2497
+ "properties": {
2498
+ "col1": {
2499
+ "type": ["null", "string"]
2500
+ },
2501
+ "col2": {
2502
+ "type": ["null", "string"]
2503
+ },
2504
+ "_ab_source_file_last_modified": {
2505
+ "type": "string"
2506
+ },
2507
+ "_ab_source_file_url": {
2508
+ "type": "string"
2509
+ },
2510
+ },
2511
+ },
2512
+ "name": "stream1",
2513
+ "source_defined_cursor": True,
2514
+ "supported_sync_modes": ["full_refresh", "incremental"],
2515
+ }
2516
+ ]
2517
+ }
2518
+ )
2519
+ .set_expected_records(
2520
+ [
2521
+ {"data": {"col1": "val11", "col2": "val12", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
2522
+ "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
2523
+ ]
2524
+ )
2525
+ ).build()
2526
+
2527
+ csv_autogenerate_column_names_scenario = (
2528
+ TestScenarioBuilder()
2529
+ .set_name("csv_autogenerate_column_names")
2530
+ .set_config(
2531
+ {
2532
+ "streams": [
2533
+ {
2534
+ "name": "stream1",
2535
+ "file_type": "csv",
2536
+ "globs": ["*"],
2537
+ "validation_policy": "emit_record",
2538
+ "format": {
2539
+ "csv": {
2540
+ "filetype": "csv",
2541
+ "autogenerate_column_names": True,
2542
+ }
2543
+ }
2544
+ }
2545
+ ],
2546
+ "start_date": "2023-06-04T03:54:07Z"
2547
+ }
2548
+ )
2549
+ .set_files(
2550
+ {
2551
+ "a.csv": {
2552
+ "contents": [
2553
+ ("val11", "val12"),
2554
+ ],
2555
+ "last_modified": "2023-06-05T03:54:07.000Z",
2556
+ }
2557
+ }
2558
+ )
2559
+ .set_file_type("csv")
2560
+ .set_expected_catalog(
2561
+ {
2562
+ "streams": [
2563
+ {
2564
+ "default_cursor_field": ["_ab_source_file_last_modified"],
2565
+ "json_schema": {
2566
+ "type": "object",
2567
+ "properties": {
2568
+ "f0": {
2569
+ "type": ["null", "string"]
2570
+ },
2571
+ "f1": {
2572
+ "type": ["null", "string"]
2573
+ },
2574
+ "_ab_source_file_last_modified": {
2575
+ "type": "string"
2576
+ },
2577
+ "_ab_source_file_url": {
2578
+ "type": "string"
2579
+ },
2580
+ },
2581
+ },
2582
+ "name": "stream1",
2583
+ "source_defined_cursor": True,
2584
+ "supported_sync_modes": ["full_refresh", "incremental"],
2585
+ }
2586
+ ]
2587
+ }
2588
+ )
2589
+ .set_expected_records(
2590
+ [
2591
+ {"data": {"f0": "val11", "f1": "val12", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
2592
+ "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
2593
+ ]
2594
+ )
2595
+ ).build()
2596
+
2597
+ csv_custom_bool_values_scenario = (
2598
+ TestScenarioBuilder()
2599
+ .set_name("csv_custom_bool_values")
2600
+ .set_config(
2601
+ {
2602
+ "streams": [
2603
+ {
2604
+ "name": "stream1",
2605
+ "file_type": "csv",
2606
+ "globs": ["*"],
2607
+ "validation_policy": "emit_record",
2608
+ "input_schema": {"col1": "boolean", "col2": "boolean"},
2609
+ "format": {
2610
+ "csv": {
2611
+ "filetype": "csv",
2612
+ "true_values": ["this_is_true"],
2613
+ "false_values": ["this_is_false"],
2614
+ }
2615
+ }
2616
+ }
2617
+ ],
2618
+ "start_date": "2023-06-04T03:54:07Z"
2619
+ }
2620
+ )
2621
+ .set_files(
2622
+ {
2623
+ "a.csv": {
2624
+ "contents": [
2625
+ ("col1", "col2"),
2626
+ ("this_is_true", "this_is_false"),
2627
+ ],
2628
+ "last_modified": "2023-06-05T03:54:07.000Z",
2629
+ }
2630
+ }
2631
+ )
2632
+ .set_file_type("csv")
2633
+ .set_expected_catalog(
2634
+ {
2635
+ "streams": [
2636
+ {
2637
+ "default_cursor_field": ["_ab_source_file_last_modified"],
2638
+ "json_schema": {
2639
+ "type": "object",
2640
+ "properties": {
2641
+ "col1": {
2642
+ "type": "boolean"
2643
+ },
2644
+ "col2": {
2645
+ "type": "boolean"
2646
+ },
2647
+ "_ab_source_file_last_modified": {
2648
+ "type": "string"
2649
+ },
2650
+ "_ab_source_file_url": {
2651
+ "type": "string"
2652
+ },
2653
+ },
2654
+ },
2655
+ "name": "stream1",
2656
+ "source_defined_cursor": True,
2657
+ "supported_sync_modes": ["full_refresh", "incremental"],
2658
+ }
2659
+ ]
2660
+ }
2661
+ )
2662
+ .set_expected_records(
2663
+ [
2664
+ {"data": {"col1": True, "col2": False, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
2665
+ "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
2666
+ ]
2667
+ )
2668
+ ).build()
2669
+
2670
+ csv_custom_null_values_scenario = (
2671
+ TestScenarioBuilder()
2672
+ .set_name("csv_custom_null_values")
2673
+ .set_config(
2674
+ {
2675
+ "streams": [
2676
+ {
2677
+ "name": "stream1",
2678
+ "file_type": "csv",
2679
+ "globs": ["*"],
2680
+ "validation_policy": "emit_record",
2681
+ "input_schema": {"col1": "boolean", "col2": "string"},
2682
+ "format": {
2683
+ "csv": {
2684
+ "filetype": "csv",
2685
+ "null_values": ["null"],
2686
+ }
2687
+ }
2688
+ }
2689
+ ],
2690
+ "start_date": "2023-06-04T03:54:07Z"
2691
+ }
2692
+ )
2693
+ .set_files(
2694
+ {
2695
+ "a.csv": {
2696
+ "contents": [
2697
+ ("col1", "col2"),
2698
+ ("null", "na"),
2699
+ ],
2700
+ "last_modified": "2023-06-05T03:54:07.000Z",
2701
+ }
2702
+ }
2703
+ )
2704
+ .set_file_type("csv")
2705
+ .set_expected_catalog(
2706
+ {
2707
+ "streams": [
2708
+ {
2709
+ "default_cursor_field": ["_ab_source_file_last_modified"],
2710
+ "json_schema": {
2711
+ "type": "object",
2712
+ "properties": {
2713
+ "col1": {
2714
+ "type": "boolean"
2715
+ },
2716
+ "col2": {
2717
+ "type": "string"
2718
+ },
2719
+ "_ab_source_file_last_modified": {
2720
+ "type": "string"
2721
+ },
2722
+ "_ab_source_file_url": {
2723
+ "type": "string"
2724
+ },
2725
+ },
2726
+ },
2727
+ "name": "stream1",
2728
+ "source_defined_cursor": True,
2729
+ "supported_sync_modes": ["full_refresh", "incremental"],
2730
+ }
2731
+ ]
2732
+ }
2733
+ )
2734
+ .set_expected_records(
2735
+ [
2736
+ {"data": {"col1": None, "col2": "na", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
2737
+ "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
2738
+ ]
2739
+ )
2740
+ ).build()