airbyte-cdk 0.40.2__py3-none-any.whl → 0.40.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/message_grouper.py +6 -1
- airbyte_cdk/connector_builder/models.py +1 -0
- airbyte_cdk/utils/datetime_format_inferrer.py +80 -0
- {airbyte_cdk-0.40.2.dist-info → airbyte_cdk-0.40.3.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.40.2.dist-info → airbyte_cdk-0.40.3.dist-info}/RECORD +11 -9
- unit_tests/connector_builder/test_connector_builder_handler.py +3 -0
- unit_tests/connector_builder/test_message_grouper.py +8 -6
- unit_tests/utils/test_datetime_format_inferrer.py +53 -0
- {airbyte_cdk-0.40.2.dist-info → airbyte_cdk-0.40.3.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.40.2.dist-info → airbyte_cdk-0.40.3.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.40.2.dist-info → airbyte_cdk-0.40.3.dist-info}/top_level.txt +0 -0
@@ -14,6 +14,7 @@ from airbyte_cdk.entrypoint import AirbyteEntrypoint
|
|
14
14
|
from airbyte_cdk.sources import AbstractSource
|
15
15
|
from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
|
16
16
|
from airbyte_cdk.utils import AirbyteTracedException
|
17
|
+
from airbyte_cdk.utils.datetime_format_inferrer import DatetimeFormatInferrer
|
17
18
|
from airbyte_cdk.utils.schema_inferrer import SchemaInferrer
|
18
19
|
from airbyte_protocol.models.airbyte_protocol import (
|
19
20
|
AirbyteControlMessage,
|
@@ -46,6 +47,7 @@ class MessageGrouper:
|
|
46
47
|
if record_limit is not None and not (1 <= record_limit <= 1000):
|
47
48
|
raise ValueError(f"Record limit must be between 1 and 1000. Got {record_limit}")
|
48
49
|
schema_inferrer = SchemaInferrer()
|
50
|
+
datetime_format_inferrer = DatetimeFormatInferrer()
|
49
51
|
|
50
52
|
if record_limit is None:
|
51
53
|
record_limit = self._max_record_limit
|
@@ -58,6 +60,7 @@ class MessageGrouper:
|
|
58
60
|
for message_group in self._get_message_groups(
|
59
61
|
self._read_stream(source, config, configured_catalog),
|
60
62
|
schema_inferrer,
|
63
|
+
datetime_format_inferrer,
|
61
64
|
record_limit,
|
62
65
|
):
|
63
66
|
if isinstance(message_group, AirbyteLogMessage):
|
@@ -80,10 +83,11 @@ class MessageGrouper:
|
|
80
83
|
configured_catalog.streams[0].stream.name
|
81
84
|
), # The connector builder currently only supports reading from a single stream at a time
|
82
85
|
latest_config_update=latest_config_update.connectorConfig.config if latest_config_update else self._clean_config(config),
|
86
|
+
inferred_datetime_formats=datetime_format_inferrer.get_inferred_datetime_formats(),
|
83
87
|
)
|
84
88
|
|
85
89
|
def _get_message_groups(
|
86
|
-
self, messages: Iterator[AirbyteMessage], schema_inferrer: SchemaInferrer, limit: int
|
90
|
+
self, messages: Iterator[AirbyteMessage], schema_inferrer: SchemaInferrer, datetime_format_inferrer: DatetimeFormatInferrer, limit: int
|
87
91
|
) -> Iterable[Union[StreamReadPages, AirbyteControlMessage, AirbyteLogMessage, AirbyteTraceMessage]]:
|
88
92
|
"""
|
89
93
|
Message groups are partitioned according to when request log messages are received. Subsequent response log messages
|
@@ -141,6 +145,7 @@ class MessageGrouper:
|
|
141
145
|
current_page_records.append(message.record.data)
|
142
146
|
records_count += 1
|
143
147
|
schema_inferrer.accumulate(message.record)
|
148
|
+
datetime_format_inferrer.accumulate(message.record)
|
144
149
|
elif message.type == MessageType.CONTROL and message.control.type == OrchestratorType.CONNECTOR_CONFIG:
|
145
150
|
yield message.control
|
146
151
|
else:
|
@@ -0,0 +1,80 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from typing import Any, Dict, Union
|
6
|
+
|
7
|
+
from airbyte_cdk.models import AirbyteRecordMessage
|
8
|
+
from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser
|
9
|
+
|
10
|
+
|
11
|
+
class DatetimeFormatInferrer:
|
12
|
+
"""
|
13
|
+
This class is used to detect toplevel fields in records that might be datetime values, along with the used format.
|
14
|
+
"""
|
15
|
+
|
16
|
+
def __init__(self):
|
17
|
+
self._parser = DatetimeParser()
|
18
|
+
self._datetime_candidates: Union[None, Dict[str, str]] = None
|
19
|
+
self._formats = [
|
20
|
+
"%Y-%m-%d",
|
21
|
+
"%Y-%m-%d %H:%M:%S",
|
22
|
+
"%Y-%m-%d %H:%M:%S.%f+00:00",
|
23
|
+
"%Y-%m-%dT%H:%M:%S.%f%z",
|
24
|
+
"%s",
|
25
|
+
"%d/%m/%Y %H:%M",
|
26
|
+
"%Y-%m",
|
27
|
+
"%d-%m-%Y",
|
28
|
+
"%Y-%m-%dT%H:%M:%SZ",
|
29
|
+
]
|
30
|
+
self._timestamp_heuristic_range = range(1_000_000_000, 2_000_000_000)
|
31
|
+
|
32
|
+
def _can_be_datetime(self, value: Any) -> bool:
|
33
|
+
"""Checks if the value can be a datetime. This is the case if the value is a string or an integer between 1_000_000_000 and 2_000_000_000. This is separate from the format check for performance reasons"""
|
34
|
+
if isinstance(value, str) and (not value.isdecimal() or int(value) in self._timestamp_heuristic_range):
|
35
|
+
return True
|
36
|
+
if isinstance(value, int) and value in self._timestamp_heuristic_range:
|
37
|
+
return True
|
38
|
+
return False
|
39
|
+
|
40
|
+
def _matches_format(self, value: Any, format: str) -> bool:
|
41
|
+
"""Checks if the value matches the format"""
|
42
|
+
try:
|
43
|
+
self._parser.parse(value, format)
|
44
|
+
return True
|
45
|
+
except ValueError:
|
46
|
+
return False
|
47
|
+
|
48
|
+
def _initialize(self, record: AirbyteRecordMessage):
|
49
|
+
"""Initializes the internal state of the class"""
|
50
|
+
self._datetime_candidates = {}
|
51
|
+
for field_name, field_value in record.data.items():
|
52
|
+
if not self._can_be_datetime(field_value):
|
53
|
+
continue
|
54
|
+
for format in self._formats:
|
55
|
+
if self._matches_format(field_value, format):
|
56
|
+
self._datetime_candidates[field_name] = format
|
57
|
+
break
|
58
|
+
|
59
|
+
def _validate(self, record: AirbyteRecordMessage):
|
60
|
+
"""Validates that the record is consistent with the inferred datetime formats"""
|
61
|
+
for candidate_field_name in list(self._datetime_candidates.keys()):
|
62
|
+
candidate_field_format = self._datetime_candidates[candidate_field_name]
|
63
|
+
current_value = record.data.get(candidate_field_name, None)
|
64
|
+
if (
|
65
|
+
current_value is None
|
66
|
+
or not self._can_be_datetime(current_value)
|
67
|
+
or not self._matches_format(current_value, candidate_field_format)
|
68
|
+
):
|
69
|
+
self._datetime_candidates.pop(candidate_field_name)
|
70
|
+
|
71
|
+
def accumulate(self, record: AirbyteRecordMessage):
|
72
|
+
"""Analyzes the record and updates the internal state of candidate datetime fields"""
|
73
|
+
self._initialize(record) if self._datetime_candidates is None else self._validate(record)
|
74
|
+
|
75
|
+
def get_inferred_datetime_formats(self) -> Dict[str, str]:
|
76
|
+
"""
|
77
|
+
Returns the list of candidate datetime fields - the keys are the field names and the values are the inferred datetime formats.
|
78
|
+
For these fields the format was consistent across all visited records.
|
79
|
+
"""
|
80
|
+
return self._datetime_candidates or {}
|
@@ -8,8 +8,8 @@ airbyte_cdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
airbyte_cdk/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
9
9
|
airbyte_cdk/connector_builder/connector_builder_handler.py,sha256=q8mqQjNqpvHZgwVbNuvSe19o4Aw6MQTuhA2URmdz0K0,5443
|
10
10
|
airbyte_cdk/connector_builder/main.py,sha256=jn2gqaYAvd6uDoFe0oVhnY23grm5sL-jfIX6kGvhVxk,2994
|
11
|
-
airbyte_cdk/connector_builder/message_grouper.py,sha256=
|
12
|
-
airbyte_cdk/connector_builder/models.py,sha256=
|
11
|
+
airbyte_cdk/connector_builder/message_grouper.py,sha256=yEjvwdXgzYK29xwjl88-4s-J49iaud8_aOrAlOkAzsg,12504
|
12
|
+
airbyte_cdk/connector_builder/models.py,sha256=jL2SJIWJTLCbBqobw5Qo8WGS0aN-K9TRmfSpDHM5vYc,1277
|
13
13
|
airbyte_cdk/destinations/__init__.py,sha256=0Uxmz3iBAyZJdk_bqUVt2pb0UwRTpFjTnFE6fQFbWKY,126
|
14
14
|
airbyte_cdk/destinations/destination.py,sha256=_tIMnKcRQbtIsjVvNOVjfbIxgCNLuBXQwQj8MyVm3BI,5420
|
15
15
|
airbyte_cdk/models/__init__.py,sha256=LPQcYdDPwrCXiBPe_jexO4UAcbovIb1V9tHB6I7Un30,633
|
@@ -156,6 +156,7 @@ airbyte_cdk/sources/utils/schema_models.py,sha256=m1vOqNkkVYGblc492wKo11Zm5FK9F0
|
|
156
156
|
airbyte_cdk/sources/utils/transform.py,sha256=4GYmO6bq33HF-a1in0dKQKqUOYI1bWItyuYF875bSQg,9493
|
157
157
|
airbyte_cdk/utils/__init__.py,sha256=kFLcs2P-tbPyeVOJS9rOv1jZdnSpjG24ro0CHgt_CIk,215
|
158
158
|
airbyte_cdk/utils/airbyte_secrets_utils.py,sha256=q3aDl8T10ufGbeqnUPqbZLxQcHdkf2kDfQK_upWzBbI,2894
|
159
|
+
airbyte_cdk/utils/datetime_format_inferrer.py,sha256=1z5lGq_DI9LFrT68ftlJSqndS6i-Rs1PX7T_RBtOJpA,3443
|
159
160
|
airbyte_cdk/utils/event_timing.py,sha256=Hn5kCc9xGKLcV5EYpJCZwNiz9neKKu2WG8FJF_hy278,2377
|
160
161
|
airbyte_cdk/utils/schema_inferrer.py,sha256=j0us_mEMj8PVVzSZfoS1adK7V7a--mSHQozo6xmsiIc,3720
|
161
162
|
airbyte_cdk/utils/stream_status_utils.py,sha256=X1Vy7BhglycjdIWpfKDfwJussNCxYffelKt6Utjx-qY,1005
|
@@ -163,8 +164,8 @@ airbyte_cdk/utils/traced_exception.py,sha256=9G2sG9eYkvn6Aa7rMuUW_KIRszRaTc_xdnT
|
|
163
164
|
source_declarative_manifest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
164
165
|
source_declarative_manifest/main.py,sha256=HXzuRsRyhHwPrGU-hc4S7RrgoOoHImqkdfbmO2geBeE,1027
|
165
166
|
unit_tests/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
166
|
-
unit_tests/connector_builder/test_connector_builder_handler.py,sha256=
|
167
|
-
unit_tests/connector_builder/test_message_grouper.py,sha256=
|
167
|
+
unit_tests/connector_builder/test_connector_builder_handler.py,sha256=UtGSzZshZeWZcc5lt3Kt6-8aDFFwj2sLvzjCBfPkrkg,27054
|
168
|
+
unit_tests/connector_builder/test_message_grouper.py,sha256=Rek2qmuexLtfsQmHEUR_7FH-eDg3CnFiOOWVUgB9ow8,28802
|
168
169
|
unit_tests/connector_builder/utils.py,sha256=AAggdGWP-mNuWOZUHLAVIbjTeIcdPo-3pbMm5zdYpS0,796
|
169
170
|
unit_tests/destinations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
170
171
|
unit_tests/destinations/test_destination.py,sha256=koG_j812KMkcIxoUH6XlAL3zsephZJmlHvyzJXm0dCs,10269
|
@@ -258,12 +259,13 @@ unit_tests/sources/streams/http/auth/test_auth.py,sha256=gdWpJ-cR64qRXmmPOQWhVd4
|
|
258
259
|
unit_tests/sources/streams/http/requests_native_auth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
259
260
|
unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py,sha256=_BZVsG_LZUXfBmHWTlKIw65eGkdwFSiKRlpjsccj61U,12396
|
260
261
|
unit_tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
262
|
+
unit_tests/utils/test_datetime_format_inferrer.py,sha256=Io2o5flTre9gyI_IDDMpzxOjCz3sr16LO0GRqOD59uk,2946
|
261
263
|
unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg4MNPAG-xhpk,7817
|
262
264
|
unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
|
263
265
|
unit_tests/utils/test_stream_status_utils.py,sha256=NpV155JMXA6CG-2Zvofa14lItobyh3Onttc59X4m5DI,3382
|
264
266
|
unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
|
265
|
-
airbyte_cdk-0.40.
|
266
|
-
airbyte_cdk-0.40.
|
267
|
-
airbyte_cdk-0.40.
|
268
|
-
airbyte_cdk-0.40.
|
269
|
-
airbyte_cdk-0.40.
|
267
|
+
airbyte_cdk-0.40.3.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
268
|
+
airbyte_cdk-0.40.3.dist-info/METADATA,sha256=pAfHdGCbN9Iz4q4xcnO3z3sATNNzWz4h7KX5eUQGq1I,8902
|
269
|
+
airbyte_cdk-0.40.3.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
270
|
+
airbyte_cdk-0.40.3.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
|
271
|
+
airbyte_cdk-0.40.3.dist-info/RECORD,,
|
@@ -354,6 +354,7 @@ def test_read():
|
|
354
354
|
],
|
355
355
|
test_read_limit_reached=False,
|
356
356
|
inferred_schema=None,
|
357
|
+
inferred_datetime_formats=None,
|
357
358
|
latest_config_update={}
|
358
359
|
)
|
359
360
|
|
@@ -368,6 +369,7 @@ def test_read():
|
|
368
369
|
],
|
369
370
|
"test_read_limit_reached": False,
|
370
371
|
"inferred_schema": None,
|
372
|
+
"inferred_datetime_formats": None,
|
371
373
|
"latest_config_update": {}
|
372
374
|
},
|
373
375
|
emitted_at=1,
|
@@ -410,6 +412,7 @@ def test_read_returns_error_response(mock_from_exception):
|
|
410
412
|
slice_descriptor=None, state=None)],
|
411
413
|
test_read_limit_reached=False,
|
412
414
|
inferred_schema=None,
|
415
|
+
inferred_datetime_formats={},
|
413
416
|
latest_config_update={})
|
414
417
|
|
415
418
|
expected_message = AirbyteMessage(
|
@@ -94,7 +94,8 @@ def test_get_grouped_messages(mock_entrypoint_read):
|
|
94
94
|
"body": {"custom": "field"},
|
95
95
|
}
|
96
96
|
response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}', "http_method": "GET"}
|
97
|
-
expected_schema = {"$schema": "http://json-schema.org/schema#", "properties": {"name": {"type": "string"}}, "type": "object"}
|
97
|
+
expected_schema = {"$schema": "http://json-schema.org/schema#", "properties": {"name": {"type": "string"}, "date": {"type": "string"}}, "type": "object"}
|
98
|
+
expected_datetime_fields = {"date":"%Y-%m-%d"}
|
98
99
|
expected_pages = [
|
99
100
|
StreamReadPages(
|
100
101
|
request=HttpRequest(
|
@@ -105,7 +106,7 @@ def test_get_grouped_messages(mock_entrypoint_read):
|
|
105
106
|
http_method="GET",
|
106
107
|
),
|
107
108
|
response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'),
|
108
|
-
records=[{"name": "Shinobu Kocho"}, {"name": "Muichiro Tokito"}],
|
109
|
+
records=[{"name": "Shinobu Kocho", "date": "2023-03-03"}, {"name": "Muichiro Tokito", "date": "2023-03-04"}],
|
109
110
|
),
|
110
111
|
StreamReadPages(
|
111
112
|
request=HttpRequest(
|
@@ -116,7 +117,7 @@ def test_get_grouped_messages(mock_entrypoint_read):
|
|
116
117
|
http_method="GET",
|
117
118
|
),
|
118
119
|
response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'),
|
119
|
-
records=[{"name": "Mitsuri Kanroji"}],
|
120
|
+
records=[{"name": "Mitsuri Kanroji", "date": "2023-03-05"}],
|
120
121
|
),
|
121
122
|
]
|
122
123
|
|
@@ -124,11 +125,11 @@ def test_get_grouped_messages(mock_entrypoint_read):
|
|
124
125
|
[
|
125
126
|
request_log_message(request),
|
126
127
|
response_log_message(response),
|
127
|
-
record_message("hashiras", {"name": "Shinobu Kocho"}),
|
128
|
-
record_message("hashiras", {"name": "Muichiro Tokito"}),
|
128
|
+
record_message("hashiras", {"name": "Shinobu Kocho", "date": "2023-03-03"}),
|
129
|
+
record_message("hashiras", {"name": "Muichiro Tokito", "date": "2023-03-04"}),
|
129
130
|
request_log_message(request),
|
130
131
|
response_log_message(response),
|
131
|
-
record_message("hashiras", {"name": "Mitsuri Kanroji"}),
|
132
|
+
record_message("hashiras", {"name": "Mitsuri Kanroji", "date": "2023-03-05"}),
|
132
133
|
]
|
133
134
|
))
|
134
135
|
|
@@ -138,6 +139,7 @@ def test_get_grouped_messages(mock_entrypoint_read):
|
|
138
139
|
)
|
139
140
|
|
140
141
|
assert actual_response.inferred_schema == expected_schema
|
142
|
+
assert actual_response.inferred_datetime_formats == expected_datetime_fields
|
141
143
|
|
142
144
|
single_slice = actual_response.slices[0]
|
143
145
|
for i, actual_page in enumerate(single_slice.pages):
|
@@ -0,0 +1,53 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from typing import Dict, List
|
6
|
+
|
7
|
+
import pytest
|
8
|
+
from airbyte_cdk.models.airbyte_protocol import AirbyteRecordMessage
|
9
|
+
from airbyte_cdk.utils.datetime_format_inferrer import DatetimeFormatInferrer
|
10
|
+
|
11
|
+
NOW = 1234567
|
12
|
+
|
13
|
+
|
14
|
+
@pytest.mark.parametrize(
|
15
|
+
"test_name,input_records,expected_candidate_fields",
|
16
|
+
[
|
17
|
+
("empty", [], {}),
|
18
|
+
("simple_match", [{"d": "2022-02-03"}], {"d": "%Y-%m-%d"}),
|
19
|
+
("timestamp_match_integer", [{"d": 1686058051}], {"d": "%s"}),
|
20
|
+
("timestamp_match_string", [{"d": "1686058051"}], {"d": "%s"}),
|
21
|
+
("timestamp_no_match_integer", [{"d": 99}], {}),
|
22
|
+
("timestamp_no_match_string", [{"d": "99999999999999999999"}], {}),
|
23
|
+
("simple_no_match", [{"d": "20220203"}], {}),
|
24
|
+
("multiple_match", [{"d": "2022-02-03", "e": "2022-02-03"}], {"d": "%Y-%m-%d", "e": "%Y-%m-%d"}),
|
25
|
+
(
|
26
|
+
"multiple_no_match",
|
27
|
+
[{"d": "20220203", "r": "ccc", "e": {"something-else": "2023-03-03"}, "s": ["2023-03-03"], "x": False, "y": 123}],
|
28
|
+
{},
|
29
|
+
),
|
30
|
+
("format_1", [{"d": "2022-02-03"}], {"d": "%Y-%m-%d"}),
|
31
|
+
("format_2", [{"d": "2022-02-03 12:34:56"}], {"d": "%Y-%m-%d %H:%M:%S"}),
|
32
|
+
("format_3", [{"d": "2022-02-03 12:34:56.123456+00:00"}], {"d": "%Y-%m-%d %H:%M:%S.%f+00:00"}),
|
33
|
+
("format_4", [{"d": "2022-02-03T12:34:56.123456+0000"}], {"d": "%Y-%m-%dT%H:%M:%S.%f%z"}),
|
34
|
+
("format_4 2", [{"d": "2022-02-03T12:34:56.000Z"}], {"d": "%Y-%m-%dT%H:%M:%S.%f%z"}),
|
35
|
+
("format_4 2", [{"d": "2022-02-03T12:34:56.000000Z"}], {"d": "%Y-%m-%dT%H:%M:%S.%f%z"}),
|
36
|
+
("format_6", [{"d": "03/02/2022 12:34"}], {"d": "%d/%m/%Y %H:%M"}),
|
37
|
+
("format_7", [{"d": "2022-02"}], {"d": "%Y-%m"}),
|
38
|
+
("format_8", [{"d": "03-02-2022"}], {"d": "%d-%m-%Y"}),
|
39
|
+
("limit_down", [{"d": "2022-02-03", "x": "2022-02-03"}, {"d": "2022-02-03", "x": "another thing"}], {"d": "%Y-%m-%d"}),
|
40
|
+
("limit_down all", [{"d": "2022-02-03", "x": "2022-02-03"}, {"d": "also another thing", "x": "another thing"}], {}),
|
41
|
+
("limit_down empty", [{"d": "2022-02-03", "x": "2022-02-03"}, {}], {}),
|
42
|
+
("limit_down unsupported type", [{"d": "2022-02-03"}, {"d": False}], {}),
|
43
|
+
("limit_down complex type", [{"d": "2022-02-03"}, {"d": {"date": "2022-03-03"}}], {}),
|
44
|
+
("limit_down different format", [{"d": "2022-02-03"}, {"d": 1686058051}], {}),
|
45
|
+
("limit_down different format", [{"d": "2022-02-03"}, {"d": "2022-02-03T12:34:56.000000Z"}], {}),
|
46
|
+
("no scope expand", [{}, {"d": "2022-02-03"}], {}),
|
47
|
+
],
|
48
|
+
)
|
49
|
+
def test_schema_inferrer(test_name, input_records: List, expected_candidate_fields: Dict[str, str]):
|
50
|
+
inferrer = DatetimeFormatInferrer()
|
51
|
+
for record in input_records:
|
52
|
+
inferrer.accumulate(AirbyteRecordMessage(stream="abc", data=record, emitted_at=NOW))
|
53
|
+
assert inferrer.get_inferred_datetime_formats() == expected_candidate_fields
|
File without changes
|
File without changes
|
File without changes
|