airbyte-cdk 0.40.1__py3-none-any.whl → 0.40.3__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/connector_builder/message_grouper.py +6 -1
- airbyte_cdk/connector_builder/models.py +1 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +4 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +4 -0
- airbyte_cdk/utils/datetime_format_inferrer.py +80 -0
- {airbyte_cdk-0.40.1.dist-info → airbyte_cdk-0.40.3.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.40.1.dist-info → airbyte_cdk-0.40.3.dist-info}/RECORD +13 -11
- unit_tests/connector_builder/test_connector_builder_handler.py +3 -0
- unit_tests/connector_builder/test_message_grouper.py +8 -6
- unit_tests/utils/test_datetime_format_inferrer.py +53 -0
- {airbyte_cdk-0.40.1.dist-info → airbyte_cdk-0.40.3.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.40.1.dist-info → airbyte_cdk-0.40.3.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.40.1.dist-info → airbyte_cdk-0.40.3.dist-info}/top_level.txt +0 -0
@@ -14,6 +14,7 @@ from airbyte_cdk.entrypoint import AirbyteEntrypoint
|
|
14
14
|
from airbyte_cdk.sources import AbstractSource
|
15
15
|
from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
|
16
16
|
from airbyte_cdk.utils import AirbyteTracedException
|
17
|
+
from airbyte_cdk.utils.datetime_format_inferrer import DatetimeFormatInferrer
|
17
18
|
from airbyte_cdk.utils.schema_inferrer import SchemaInferrer
|
18
19
|
from airbyte_protocol.models.airbyte_protocol import (
|
19
20
|
AirbyteControlMessage,
|
@@ -46,6 +47,7 @@ class MessageGrouper:
|
|
46
47
|
if record_limit is not None and not (1 <= record_limit <= 1000):
|
47
48
|
raise ValueError(f"Record limit must be between 1 and 1000. Got {record_limit}")
|
48
49
|
schema_inferrer = SchemaInferrer()
|
50
|
+
datetime_format_inferrer = DatetimeFormatInferrer()
|
49
51
|
|
50
52
|
if record_limit is None:
|
51
53
|
record_limit = self._max_record_limit
|
@@ -58,6 +60,7 @@ class MessageGrouper:
|
|
58
60
|
for message_group in self._get_message_groups(
|
59
61
|
self._read_stream(source, config, configured_catalog),
|
60
62
|
schema_inferrer,
|
63
|
+
datetime_format_inferrer,
|
61
64
|
record_limit,
|
62
65
|
):
|
63
66
|
if isinstance(message_group, AirbyteLogMessage):
|
@@ -80,10 +83,11 @@ class MessageGrouper:
|
|
80
83
|
configured_catalog.streams[0].stream.name
|
81
84
|
), # The connector builder currently only supports reading from a single stream at a time
|
82
85
|
latest_config_update=latest_config_update.connectorConfig.config if latest_config_update else self._clean_config(config),
|
86
|
+
inferred_datetime_formats=datetime_format_inferrer.get_inferred_datetime_formats(),
|
83
87
|
)
|
84
88
|
|
85
89
|
def _get_message_groups(
|
86
|
-
self, messages: Iterator[AirbyteMessage], schema_inferrer: SchemaInferrer, limit: int
|
90
|
+
self, messages: Iterator[AirbyteMessage], schema_inferrer: SchemaInferrer, datetime_format_inferrer: DatetimeFormatInferrer, limit: int
|
87
91
|
) -> Iterable[Union[StreamReadPages, AirbyteControlMessage, AirbyteLogMessage, AirbyteTraceMessage]]:
|
88
92
|
"""
|
89
93
|
Message groups are partitioned according to when request log messages are received. Subsequent response log messages
|
@@ -141,6 +145,7 @@ class MessageGrouper:
|
|
141
145
|
current_page_records.append(message.record.data)
|
142
146
|
records_count += 1
|
143
147
|
schema_inferrer.accumulate(message.record)
|
148
|
+
datetime_format_inferrer.accumulate(message.record)
|
144
149
|
elif message.type == MessageType.CONTROL and message.control.type == OrchestratorType.CONNECTOR_CONFIG:
|
145
150
|
yield message.control
|
146
151
|
else:
|
@@ -27,6 +27,10 @@ properties:
|
|
27
27
|
type: object
|
28
28
|
spec:
|
29
29
|
"$ref": "#/definitions/Spec"
|
30
|
+
metadata:
|
31
|
+
type: object
|
32
|
+
description: For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.
|
33
|
+
additionalProperties: true
|
30
34
|
additionalProperties: false
|
31
35
|
definitions:
|
32
36
|
AddedFieldDefinition:
|
@@ -1095,6 +1095,10 @@ class DeclarativeSource(BaseModel):
|
|
1095
1095
|
schemas: Optional[Schemas] = None
|
1096
1096
|
definitions: Optional[Dict[str, Any]] = None
|
1097
1097
|
spec: Optional[Spec] = None
|
1098
|
+
metadata: Optional[Dict[str, Any]] = Field(
|
1099
|
+
None,
|
1100
|
+
description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.",
|
1101
|
+
)
|
1098
1102
|
|
1099
1103
|
|
1100
1104
|
class DeclarativeStream(BaseModel):
|
@@ -0,0 +1,80 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from typing import Any, Dict, Union
|
6
|
+
|
7
|
+
from airbyte_cdk.models import AirbyteRecordMessage
|
8
|
+
from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser
|
9
|
+
|
10
|
+
|
11
|
+
class DatetimeFormatInferrer:
|
12
|
+
"""
|
13
|
+
This class is used to detect toplevel fields in records that might be datetime values, along with the used format.
|
14
|
+
"""
|
15
|
+
|
16
|
+
def __init__(self):
|
17
|
+
self._parser = DatetimeParser()
|
18
|
+
self._datetime_candidates: Union[None, Dict[str, str]] = None
|
19
|
+
self._formats = [
|
20
|
+
"%Y-%m-%d",
|
21
|
+
"%Y-%m-%d %H:%M:%S",
|
22
|
+
"%Y-%m-%d %H:%M:%S.%f+00:00",
|
23
|
+
"%Y-%m-%dT%H:%M:%S.%f%z",
|
24
|
+
"%s",
|
25
|
+
"%d/%m/%Y %H:%M",
|
26
|
+
"%Y-%m",
|
27
|
+
"%d-%m-%Y",
|
28
|
+
"%Y-%m-%dT%H:%M:%SZ",
|
29
|
+
]
|
30
|
+
self._timestamp_heuristic_range = range(1_000_000_000, 2_000_000_000)
|
31
|
+
|
32
|
+
def _can_be_datetime(self, value: Any) -> bool:
|
33
|
+
"""Checks if the value can be a datetime. This is the case if the value is a string or an integer between 1_000_000_000 and 2_000_000_000. This is separate from the format check for performance reasons"""
|
34
|
+
if isinstance(value, str) and (not value.isdecimal() or int(value) in self._timestamp_heuristic_range):
|
35
|
+
return True
|
36
|
+
if isinstance(value, int) and value in self._timestamp_heuristic_range:
|
37
|
+
return True
|
38
|
+
return False
|
39
|
+
|
40
|
+
def _matches_format(self, value: Any, format: str) -> bool:
|
41
|
+
"""Checks if the value matches the format"""
|
42
|
+
try:
|
43
|
+
self._parser.parse(value, format)
|
44
|
+
return True
|
45
|
+
except ValueError:
|
46
|
+
return False
|
47
|
+
|
48
|
+
def _initialize(self, record: AirbyteRecordMessage):
|
49
|
+
"""Initializes the internal state of the class"""
|
50
|
+
self._datetime_candidates = {}
|
51
|
+
for field_name, field_value in record.data.items():
|
52
|
+
if not self._can_be_datetime(field_value):
|
53
|
+
continue
|
54
|
+
for format in self._formats:
|
55
|
+
if self._matches_format(field_value, format):
|
56
|
+
self._datetime_candidates[field_name] = format
|
57
|
+
break
|
58
|
+
|
59
|
+
def _validate(self, record: AirbyteRecordMessage):
|
60
|
+
"""Validates that the record is consistent with the inferred datetime formats"""
|
61
|
+
for candidate_field_name in list(self._datetime_candidates.keys()):
|
62
|
+
candidate_field_format = self._datetime_candidates[candidate_field_name]
|
63
|
+
current_value = record.data.get(candidate_field_name, None)
|
64
|
+
if (
|
65
|
+
current_value is None
|
66
|
+
or not self._can_be_datetime(current_value)
|
67
|
+
or not self._matches_format(current_value, candidate_field_format)
|
68
|
+
):
|
69
|
+
self._datetime_candidates.pop(candidate_field_name)
|
70
|
+
|
71
|
+
def accumulate(self, record: AirbyteRecordMessage):
|
72
|
+
"""Analyzes the record and updates the internal state of candidate datetime fields"""
|
73
|
+
self._initialize(record) if self._datetime_candidates is None else self._validate(record)
|
74
|
+
|
75
|
+
def get_inferred_datetime_formats(self) -> Dict[str, str]:
|
76
|
+
"""
|
77
|
+
Returns the list of candidate datetime fields - the keys are the field names and the values are the inferred datetime formats.
|
78
|
+
For these fields the format was consistent across all visited records.
|
79
|
+
"""
|
80
|
+
return self._datetime_candidates or {}
|
@@ -8,8 +8,8 @@ airbyte_cdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
airbyte_cdk/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
9
9
|
airbyte_cdk/connector_builder/connector_builder_handler.py,sha256=q8mqQjNqpvHZgwVbNuvSe19o4Aw6MQTuhA2URmdz0K0,5443
|
10
10
|
airbyte_cdk/connector_builder/main.py,sha256=jn2gqaYAvd6uDoFe0oVhnY23grm5sL-jfIX6kGvhVxk,2994
|
11
|
-
airbyte_cdk/connector_builder/message_grouper.py,sha256=
|
12
|
-
airbyte_cdk/connector_builder/models.py,sha256=
|
11
|
+
airbyte_cdk/connector_builder/message_grouper.py,sha256=yEjvwdXgzYK29xwjl88-4s-J49iaud8_aOrAlOkAzsg,12504
|
12
|
+
airbyte_cdk/connector_builder/models.py,sha256=jL2SJIWJTLCbBqobw5Qo8WGS0aN-K9TRmfSpDHM5vYc,1277
|
13
13
|
airbyte_cdk/destinations/__init__.py,sha256=0Uxmz3iBAyZJdk_bqUVt2pb0UwRTpFjTnFE6fQFbWKY,126
|
14
14
|
airbyte_cdk/destinations/destination.py,sha256=_tIMnKcRQbtIsjVvNOVjfbIxgCNLuBXQwQj8MyVm3BI,5420
|
15
15
|
airbyte_cdk/models/__init__.py,sha256=LPQcYdDPwrCXiBPe_jexO4UAcbovIb1V9tHB6I7Un30,633
|
@@ -22,7 +22,7 @@ airbyte_cdk/sources/connector_state_manager.py,sha256=_R-2QnMGimKL0t5aV4f6P1dgd-
|
|
22
22
|
airbyte_cdk/sources/source.py,sha256=N3vHZzdUsBETFsql-YpO-LcgjolT_jcnAuHBhGD6Hqk,4278
|
23
23
|
airbyte_cdk/sources/declarative/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
24
24
|
airbyte_cdk/sources/declarative/create_partial.py,sha256=sUJOwD8hBzW4pxw2XhYlSTMgl-WMc5WpP5Oq_jo3fHw,3371
|
25
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256
|
25
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=-Kt09XCMs61gEphShtPTMGrqVAamr4cml03_YjDuTLQ,74196
|
26
26
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=U2As9PDKmcWDgbsWUo-RetJ9fxQOBlwntWZ0NOgs5Ac,1453
|
27
27
|
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=0iZSpypxt8bhO3Lmf3BpGRTO7Fp0Q2GI8m8xyJJUjeM,6580
|
28
28
|
airbyte_cdk/sources/declarative/exceptions.py,sha256=kTPUA4I2NV4J6HDz-mKPGMrfuc592akJnOyYx38l_QM,176
|
@@ -60,7 +60,7 @@ airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=dyIM-bzh54
|
|
60
60
|
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=Dc0F87nElWsz_Ikj938eQ9uqZvyqgFhZ8Dqf_-hvndc,4800
|
61
61
|
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=V6WGKJ9cXX1rjuM4bK3Cs9xEryMlkY2U3FMsSBhrgC8,3098
|
62
62
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=EiYnzwCHZV7EYqMJqcy6xKSeHvTKZBsQndjbEwmiTW4,93
|
63
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=
|
63
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=7XeAhmGHuNRYK97KwxvbrNXS1Az95O7gOMM3uRlGjrU,50104
|
64
64
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
65
65
|
airbyte_cdk/sources/declarative/parsers/class_types_registry.py,sha256=bK4a74opm6WHyV7HqOVws6GE5Z7cLNc5MaTha69abIQ,6086
|
66
66
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=y7_G5mM07zxT5YG975kdC2PAja-Uc83pYp8WrV3GNdo,522
|
@@ -156,6 +156,7 @@ airbyte_cdk/sources/utils/schema_models.py,sha256=m1vOqNkkVYGblc492wKo11Zm5FK9F0
|
|
156
156
|
airbyte_cdk/sources/utils/transform.py,sha256=4GYmO6bq33HF-a1in0dKQKqUOYI1bWItyuYF875bSQg,9493
|
157
157
|
airbyte_cdk/utils/__init__.py,sha256=kFLcs2P-tbPyeVOJS9rOv1jZdnSpjG24ro0CHgt_CIk,215
|
158
158
|
airbyte_cdk/utils/airbyte_secrets_utils.py,sha256=q3aDl8T10ufGbeqnUPqbZLxQcHdkf2kDfQK_upWzBbI,2894
|
159
|
+
airbyte_cdk/utils/datetime_format_inferrer.py,sha256=1z5lGq_DI9LFrT68ftlJSqndS6i-Rs1PX7T_RBtOJpA,3443
|
159
160
|
airbyte_cdk/utils/event_timing.py,sha256=Hn5kCc9xGKLcV5EYpJCZwNiz9neKKu2WG8FJF_hy278,2377
|
160
161
|
airbyte_cdk/utils/schema_inferrer.py,sha256=j0us_mEMj8PVVzSZfoS1adK7V7a--mSHQozo6xmsiIc,3720
|
161
162
|
airbyte_cdk/utils/stream_status_utils.py,sha256=X1Vy7BhglycjdIWpfKDfwJussNCxYffelKt6Utjx-qY,1005
|
@@ -163,8 +164,8 @@ airbyte_cdk/utils/traced_exception.py,sha256=9G2sG9eYkvn6Aa7rMuUW_KIRszRaTc_xdnT
|
|
163
164
|
source_declarative_manifest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
164
165
|
source_declarative_manifest/main.py,sha256=HXzuRsRyhHwPrGU-hc4S7RrgoOoHImqkdfbmO2geBeE,1027
|
165
166
|
unit_tests/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
166
|
-
unit_tests/connector_builder/test_connector_builder_handler.py,sha256=
|
167
|
-
unit_tests/connector_builder/test_message_grouper.py,sha256=
|
167
|
+
unit_tests/connector_builder/test_connector_builder_handler.py,sha256=UtGSzZshZeWZcc5lt3Kt6-8aDFFwj2sLvzjCBfPkrkg,27054
|
168
|
+
unit_tests/connector_builder/test_message_grouper.py,sha256=Rek2qmuexLtfsQmHEUR_7FH-eDg3CnFiOOWVUgB9ow8,28802
|
168
169
|
unit_tests/connector_builder/utils.py,sha256=AAggdGWP-mNuWOZUHLAVIbjTeIcdPo-3pbMm5zdYpS0,796
|
169
170
|
unit_tests/destinations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
170
171
|
unit_tests/destinations/test_destination.py,sha256=koG_j812KMkcIxoUH6XlAL3zsephZJmlHvyzJXm0dCs,10269
|
@@ -258,12 +259,13 @@ unit_tests/sources/streams/http/auth/test_auth.py,sha256=gdWpJ-cR64qRXmmPOQWhVd4
|
|
258
259
|
unit_tests/sources/streams/http/requests_native_auth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
259
260
|
unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py,sha256=_BZVsG_LZUXfBmHWTlKIw65eGkdwFSiKRlpjsccj61U,12396
|
260
261
|
unit_tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
262
|
+
unit_tests/utils/test_datetime_format_inferrer.py,sha256=Io2o5flTre9gyI_IDDMpzxOjCz3sr16LO0GRqOD59uk,2946
|
261
263
|
unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg4MNPAG-xhpk,7817
|
262
264
|
unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
|
263
265
|
unit_tests/utils/test_stream_status_utils.py,sha256=NpV155JMXA6CG-2Zvofa14lItobyh3Onttc59X4m5DI,3382
|
264
266
|
unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
|
265
|
-
airbyte_cdk-0.40.
|
266
|
-
airbyte_cdk-0.40.
|
267
|
-
airbyte_cdk-0.40.
|
268
|
-
airbyte_cdk-0.40.
|
269
|
-
airbyte_cdk-0.40.
|
267
|
+
airbyte_cdk-0.40.3.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
268
|
+
airbyte_cdk-0.40.3.dist-info/METADATA,sha256=pAfHdGCbN9Iz4q4xcnO3z3sATNNzWz4h7KX5eUQGq1I,8902
|
269
|
+
airbyte_cdk-0.40.3.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
270
|
+
airbyte_cdk-0.40.3.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
|
271
|
+
airbyte_cdk-0.40.3.dist-info/RECORD,,
|
@@ -354,6 +354,7 @@ def test_read():
|
|
354
354
|
],
|
355
355
|
test_read_limit_reached=False,
|
356
356
|
inferred_schema=None,
|
357
|
+
inferred_datetime_formats=None,
|
357
358
|
latest_config_update={}
|
358
359
|
)
|
359
360
|
|
@@ -368,6 +369,7 @@ def test_read():
|
|
368
369
|
],
|
369
370
|
"test_read_limit_reached": False,
|
370
371
|
"inferred_schema": None,
|
372
|
+
"inferred_datetime_formats": None,
|
371
373
|
"latest_config_update": {}
|
372
374
|
},
|
373
375
|
emitted_at=1,
|
@@ -410,6 +412,7 @@ def test_read_returns_error_response(mock_from_exception):
|
|
410
412
|
slice_descriptor=None, state=None)],
|
411
413
|
test_read_limit_reached=False,
|
412
414
|
inferred_schema=None,
|
415
|
+
inferred_datetime_formats={},
|
413
416
|
latest_config_update={})
|
414
417
|
|
415
418
|
expected_message = AirbyteMessage(
|
@@ -94,7 +94,8 @@ def test_get_grouped_messages(mock_entrypoint_read):
|
|
94
94
|
"body": {"custom": "field"},
|
95
95
|
}
|
96
96
|
response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}', "http_method": "GET"}
|
97
|
-
expected_schema = {"$schema": "http://json-schema.org/schema#", "properties": {"name": {"type": "string"}}, "type": "object"}
|
97
|
+
expected_schema = {"$schema": "http://json-schema.org/schema#", "properties": {"name": {"type": "string"}, "date": {"type": "string"}}, "type": "object"}
|
98
|
+
expected_datetime_fields = {"date":"%Y-%m-%d"}
|
98
99
|
expected_pages = [
|
99
100
|
StreamReadPages(
|
100
101
|
request=HttpRequest(
|
@@ -105,7 +106,7 @@ def test_get_grouped_messages(mock_entrypoint_read):
|
|
105
106
|
http_method="GET",
|
106
107
|
),
|
107
108
|
response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'),
|
108
|
-
records=[{"name": "Shinobu Kocho"}, {"name": "Muichiro Tokito"}],
|
109
|
+
records=[{"name": "Shinobu Kocho", "date": "2023-03-03"}, {"name": "Muichiro Tokito", "date": "2023-03-04"}],
|
109
110
|
),
|
110
111
|
StreamReadPages(
|
111
112
|
request=HttpRequest(
|
@@ -116,7 +117,7 @@ def test_get_grouped_messages(mock_entrypoint_read):
|
|
116
117
|
http_method="GET",
|
117
118
|
),
|
118
119
|
response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'),
|
119
|
-
records=[{"name": "Mitsuri Kanroji"}],
|
120
|
+
records=[{"name": "Mitsuri Kanroji", "date": "2023-03-05"}],
|
120
121
|
),
|
121
122
|
]
|
122
123
|
|
@@ -124,11 +125,11 @@ def test_get_grouped_messages(mock_entrypoint_read):
|
|
124
125
|
[
|
125
126
|
request_log_message(request),
|
126
127
|
response_log_message(response),
|
127
|
-
record_message("hashiras", {"name": "Shinobu Kocho"}),
|
128
|
-
record_message("hashiras", {"name": "Muichiro Tokito"}),
|
128
|
+
record_message("hashiras", {"name": "Shinobu Kocho", "date": "2023-03-03"}),
|
129
|
+
record_message("hashiras", {"name": "Muichiro Tokito", "date": "2023-03-04"}),
|
129
130
|
request_log_message(request),
|
130
131
|
response_log_message(response),
|
131
|
-
record_message("hashiras", {"name": "Mitsuri Kanroji"}),
|
132
|
+
record_message("hashiras", {"name": "Mitsuri Kanroji", "date": "2023-03-05"}),
|
132
133
|
]
|
133
134
|
))
|
134
135
|
|
@@ -138,6 +139,7 @@ def test_get_grouped_messages(mock_entrypoint_read):
|
|
138
139
|
)
|
139
140
|
|
140
141
|
assert actual_response.inferred_schema == expected_schema
|
142
|
+
assert actual_response.inferred_datetime_formats == expected_datetime_fields
|
141
143
|
|
142
144
|
single_slice = actual_response.slices[0]
|
143
145
|
for i, actual_page in enumerate(single_slice.pages):
|
@@ -0,0 +1,53 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from typing import Dict, List
|
6
|
+
|
7
|
+
import pytest
|
8
|
+
from airbyte_cdk.models.airbyte_protocol import AirbyteRecordMessage
|
9
|
+
from airbyte_cdk.utils.datetime_format_inferrer import DatetimeFormatInferrer
|
10
|
+
|
11
|
+
NOW = 1234567
|
12
|
+
|
13
|
+
|
14
|
+
@pytest.mark.parametrize(
|
15
|
+
"test_name,input_records,expected_candidate_fields",
|
16
|
+
[
|
17
|
+
("empty", [], {}),
|
18
|
+
("simple_match", [{"d": "2022-02-03"}], {"d": "%Y-%m-%d"}),
|
19
|
+
("timestamp_match_integer", [{"d": 1686058051}], {"d": "%s"}),
|
20
|
+
("timestamp_match_string", [{"d": "1686058051"}], {"d": "%s"}),
|
21
|
+
("timestamp_no_match_integer", [{"d": 99}], {}),
|
22
|
+
("timestamp_no_match_string", [{"d": "99999999999999999999"}], {}),
|
23
|
+
("simple_no_match", [{"d": "20220203"}], {}),
|
24
|
+
("multiple_match", [{"d": "2022-02-03", "e": "2022-02-03"}], {"d": "%Y-%m-%d", "e": "%Y-%m-%d"}),
|
25
|
+
(
|
26
|
+
"multiple_no_match",
|
27
|
+
[{"d": "20220203", "r": "ccc", "e": {"something-else": "2023-03-03"}, "s": ["2023-03-03"], "x": False, "y": 123}],
|
28
|
+
{},
|
29
|
+
),
|
30
|
+
("format_1", [{"d": "2022-02-03"}], {"d": "%Y-%m-%d"}),
|
31
|
+
("format_2", [{"d": "2022-02-03 12:34:56"}], {"d": "%Y-%m-%d %H:%M:%S"}),
|
32
|
+
("format_3", [{"d": "2022-02-03 12:34:56.123456+00:00"}], {"d": "%Y-%m-%d %H:%M:%S.%f+00:00"}),
|
33
|
+
("format_4", [{"d": "2022-02-03T12:34:56.123456+0000"}], {"d": "%Y-%m-%dT%H:%M:%S.%f%z"}),
|
34
|
+
("format_4 2", [{"d": "2022-02-03T12:34:56.000Z"}], {"d": "%Y-%m-%dT%H:%M:%S.%f%z"}),
|
35
|
+
("format_4 2", [{"d": "2022-02-03T12:34:56.000000Z"}], {"d": "%Y-%m-%dT%H:%M:%S.%f%z"}),
|
36
|
+
("format_6", [{"d": "03/02/2022 12:34"}], {"d": "%d/%m/%Y %H:%M"}),
|
37
|
+
("format_7", [{"d": "2022-02"}], {"d": "%Y-%m"}),
|
38
|
+
("format_8", [{"d": "03-02-2022"}], {"d": "%d-%m-%Y"}),
|
39
|
+
("limit_down", [{"d": "2022-02-03", "x": "2022-02-03"}, {"d": "2022-02-03", "x": "another thing"}], {"d": "%Y-%m-%d"}),
|
40
|
+
("limit_down all", [{"d": "2022-02-03", "x": "2022-02-03"}, {"d": "also another thing", "x": "another thing"}], {}),
|
41
|
+
("limit_down empty", [{"d": "2022-02-03", "x": "2022-02-03"}, {}], {}),
|
42
|
+
("limit_down unsupported type", [{"d": "2022-02-03"}, {"d": False}], {}),
|
43
|
+
("limit_down complex type", [{"d": "2022-02-03"}, {"d": {"date": "2022-03-03"}}], {}),
|
44
|
+
("limit_down different format", [{"d": "2022-02-03"}, {"d": 1686058051}], {}),
|
45
|
+
("limit_down different format", [{"d": "2022-02-03"}, {"d": "2022-02-03T12:34:56.000000Z"}], {}),
|
46
|
+
("no scope expand", [{}, {"d": "2022-02-03"}], {}),
|
47
|
+
],
|
48
|
+
)
|
49
|
+
def test_schema_inferrer(test_name, input_records: List, expected_candidate_fields: Dict[str, str]):
|
50
|
+
inferrer = DatetimeFormatInferrer()
|
51
|
+
for record in input_records:
|
52
|
+
inferrer.accumulate(AirbyteRecordMessage(stream="abc", data=record, emitted_at=NOW))
|
53
|
+
assert inferrer.get_inferred_datetime_formats() == expected_candidate_fields
|
File without changes
|
File without changes
|
File without changes
|