airbyte-cdk 0.40.2__py3-none-any.whl → 0.40.4__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/config_observation.py +10 -2
- airbyte_cdk/connector_builder/message_grouper.py +7 -2
- airbyte_cdk/connector_builder/models.py +1 -0
- airbyte_cdk/entrypoint.py +34 -19
- airbyte_cdk/sources/abstract_source.py +13 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +61 -3
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +6 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +4 -4
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +7 -2
- airbyte_cdk/sources/message/__init__.py +7 -0
- airbyte_cdk/sources/message/repository.py +36 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +6 -1
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +9 -2
- airbyte_cdk/utils/datetime_format_inferrer.py +80 -0
- {airbyte_cdk-0.40.2.dist-info → airbyte_cdk-0.40.4.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.40.2.dist-info → airbyte_cdk-0.40.4.dist-info}/RECORD +27 -21
- unit_tests/connector_builder/test_connector_builder_handler.py +37 -3
- unit_tests/connector_builder/test_message_grouper.py +8 -18
- unit_tests/sources/declarative/auth/test_oauth.py +3 -3
- unit_tests/sources/message/__init__.py +0 -0
- unit_tests/sources/message/test_repository.py +65 -0
- unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +30 -4
- unit_tests/sources/test_abstract_source.py +47 -1
- unit_tests/utils/test_datetime_format_inferrer.py +53 -0
- {airbyte_cdk-0.40.2.dist-info → airbyte_cdk-0.40.4.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.40.2.dist-info → airbyte_cdk-0.40.4.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.40.2.dist-info → airbyte_cdk-0.40.4.dist-info}/top_level.txt +0 -0
@@ -68,10 +68,18 @@ def observe_connector_config(non_observed_connector_config: MutableMapping[str,
|
|
68
68
|
|
69
69
|
|
70
70
|
def emit_configuration_as_airbyte_control_message(config: MutableMapping):
|
71
|
+
"""
|
72
|
+
WARNING: deprecated - emit_configuration_as_airbyte_control_message is being deprecated in favor of the MessageRepository mechanism.
|
73
|
+
See the airbyte_cdk.sources.message package
|
74
|
+
"""
|
75
|
+
airbyte_message = create_connector_config_control_message(config)
|
76
|
+
print(airbyte_message.json(exclude_unset=True))
|
77
|
+
|
78
|
+
|
79
|
+
def create_connector_config_control_message(config):
|
71
80
|
control_message = AirbyteControlMessage(
|
72
81
|
type=OrchestratorType.CONNECTOR_CONFIG,
|
73
82
|
emitted_at=time.time() * 1000,
|
74
83
|
connectorConfig=AirbyteControlConnectorConfigMessage(config=config),
|
75
84
|
)
|
76
|
-
|
77
|
-
print(airbyte_message.json(exclude_unset=True))
|
85
|
+
return AirbyteMessage(type=Type.CONTROL, control=control_message)
|
@@ -14,6 +14,7 @@ from airbyte_cdk.entrypoint import AirbyteEntrypoint
|
|
14
14
|
from airbyte_cdk.sources import AbstractSource
|
15
15
|
from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
|
16
16
|
from airbyte_cdk.utils import AirbyteTracedException
|
17
|
+
from airbyte_cdk.utils.datetime_format_inferrer import DatetimeFormatInferrer
|
17
18
|
from airbyte_cdk.utils.schema_inferrer import SchemaInferrer
|
18
19
|
from airbyte_protocol.models.airbyte_protocol import (
|
19
20
|
AirbyteControlMessage,
|
@@ -46,6 +47,7 @@ class MessageGrouper:
|
|
46
47
|
if record_limit is not None and not (1 <= record_limit <= 1000):
|
47
48
|
raise ValueError(f"Record limit must be between 1 and 1000. Got {record_limit}")
|
48
49
|
schema_inferrer = SchemaInferrer()
|
50
|
+
datetime_format_inferrer = DatetimeFormatInferrer()
|
49
51
|
|
50
52
|
if record_limit is None:
|
51
53
|
record_limit = self._max_record_limit
|
@@ -58,6 +60,7 @@ class MessageGrouper:
|
|
58
60
|
for message_group in self._get_message_groups(
|
59
61
|
self._read_stream(source, config, configured_catalog),
|
60
62
|
schema_inferrer,
|
63
|
+
datetime_format_inferrer,
|
61
64
|
record_limit,
|
62
65
|
):
|
63
66
|
if isinstance(message_group, AirbyteLogMessage):
|
@@ -79,11 +82,12 @@ class MessageGrouper:
|
|
79
82
|
inferred_schema=schema_inferrer.get_stream_schema(
|
80
83
|
configured_catalog.streams[0].stream.name
|
81
84
|
), # The connector builder currently only supports reading from a single stream at a time
|
82
|
-
latest_config_update=latest_config_update.connectorConfig.config if latest_config_update else
|
85
|
+
latest_config_update=self._clean_config(latest_config_update.connectorConfig.config) if latest_config_update else None,
|
86
|
+
inferred_datetime_formats=datetime_format_inferrer.get_inferred_datetime_formats(),
|
83
87
|
)
|
84
88
|
|
85
89
|
def _get_message_groups(
|
86
|
-
self, messages: Iterator[AirbyteMessage], schema_inferrer: SchemaInferrer, limit: int
|
90
|
+
self, messages: Iterator[AirbyteMessage], schema_inferrer: SchemaInferrer, datetime_format_inferrer: DatetimeFormatInferrer, limit: int
|
87
91
|
) -> Iterable[Union[StreamReadPages, AirbyteControlMessage, AirbyteLogMessage, AirbyteTraceMessage]]:
|
88
92
|
"""
|
89
93
|
Message groups are partitioned according to when request log messages are received. Subsequent response log messages
|
@@ -141,6 +145,7 @@ class MessageGrouper:
|
|
141
145
|
current_page_records.append(message.record.data)
|
142
146
|
records_count += 1
|
143
147
|
schema_inferrer.accumulate(message.record)
|
148
|
+
datetime_format_inferrer.accumulate(message.record)
|
144
149
|
elif message.type == MessageType.CONTROL and message.control.type == OrchestratorType.CONNECTOR_CONFIG:
|
145
150
|
yield message.control
|
146
151
|
else:
|
airbyte_cdk/entrypoint.py
CHANGED
@@ -77,27 +77,32 @@ class AirbyteEntrypoint(object):
|
|
77
77
|
else:
|
78
78
|
self.logger.setLevel(logging.INFO)
|
79
79
|
|
80
|
-
# todo: add try catch for exceptions with different exit codes
|
81
80
|
source_spec: ConnectorSpecification = self.source.spec(self.logger)
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
if cmd == "check":
|
91
|
-
yield from map(AirbyteEntrypoint.airbyte_message_to_string, self.check(source_spec, config))
|
92
|
-
elif cmd == "discover":
|
93
|
-
yield from map(AirbyteEntrypoint.airbyte_message_to_string, self.discover(source_spec, config))
|
94
|
-
elif cmd == "read":
|
95
|
-
config_catalog = self.source.read_catalog(parsed_args.catalog)
|
96
|
-
state = self.source.read_state(parsed_args.state)
|
97
|
-
|
98
|
-
yield from map(AirbyteEntrypoint.airbyte_message_to_string, self.read(source_spec, config, config_catalog, state))
|
81
|
+
try:
|
82
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
83
|
+
if cmd == "spec":
|
84
|
+
message = AirbyteMessage(type=Type.SPEC, spec=source_spec)
|
85
|
+
yield from [
|
86
|
+
self.airbyte_message_to_string(queued_message) for queued_message in self._emit_queued_messages(self.source)
|
87
|
+
]
|
88
|
+
yield self.airbyte_message_to_string(message)
|
99
89
|
else:
|
100
|
-
|
90
|
+
raw_config = self.source.read_config(parsed_args.config)
|
91
|
+
config = self.source.configure(raw_config, temp_dir)
|
92
|
+
|
93
|
+
if cmd == "check":
|
94
|
+
yield from map(AirbyteEntrypoint.airbyte_message_to_string, self.check(source_spec, config))
|
95
|
+
elif cmd == "discover":
|
96
|
+
yield from map(AirbyteEntrypoint.airbyte_message_to_string, self.discover(source_spec, config))
|
97
|
+
elif cmd == "read":
|
98
|
+
config_catalog = self.source.read_catalog(parsed_args.catalog)
|
99
|
+
state = self.source.read_state(parsed_args.state)
|
100
|
+
|
101
|
+
yield from map(AirbyteEntrypoint.airbyte_message_to_string, self.read(source_spec, config, config_catalog, state))
|
102
|
+
else:
|
103
|
+
raise Exception("Unexpected command " + cmd)
|
104
|
+
finally:
|
105
|
+
yield from [self.airbyte_message_to_string(queued_message) for queued_message in self._emit_queued_messages(self.source)]
|
101
106
|
|
102
107
|
def check(self, source_spec: ConnectorSpecification, config: TConfig) -> Iterable[AirbyteMessage]:
|
103
108
|
self.set_up_secret_filter(config, source_spec.connectionSpecification)
|
@@ -106,6 +111,7 @@ class AirbyteEntrypoint(object):
|
|
106
111
|
except AirbyteTracedException as traced_exc:
|
107
112
|
connection_status = traced_exc.as_connection_status_message()
|
108
113
|
if connection_status:
|
114
|
+
yield from self._emit_queued_messages(self.source)
|
109
115
|
yield connection_status
|
110
116
|
return
|
111
117
|
|
@@ -115,6 +121,7 @@ class AirbyteEntrypoint(object):
|
|
115
121
|
else:
|
116
122
|
self.logger.error("Check failed")
|
117
123
|
|
124
|
+
yield from self._emit_queued_messages(self.source)
|
118
125
|
yield AirbyteMessage(type=Type.CONNECTION_STATUS, connectionStatus=check_result)
|
119
126
|
|
120
127
|
def discover(self, source_spec: ConnectorSpecification, config: TConfig) -> Iterable[AirbyteMessage]:
|
@@ -122,6 +129,8 @@ class AirbyteEntrypoint(object):
|
|
122
129
|
if self.source.check_config_against_spec:
|
123
130
|
self.validate_connection(source_spec, config)
|
124
131
|
catalog = self.source.discover(self.logger, config)
|
132
|
+
|
133
|
+
yield from self._emit_queued_messages(self.source)
|
125
134
|
yield AirbyteMessage(type=Type.CATALOG, catalog=catalog)
|
126
135
|
|
127
136
|
def read(self, source_spec: ConnectorSpecification, config: TConfig, catalog: TCatalog, state: TState) -> Iterable[AirbyteMessage]:
|
@@ -130,6 +139,7 @@ class AirbyteEntrypoint(object):
|
|
130
139
|
self.validate_connection(source_spec, config)
|
131
140
|
|
132
141
|
yield from self.source.read(self.logger, config, catalog, state)
|
142
|
+
yield from self._emit_queued_messages(self.source)
|
133
143
|
|
134
144
|
@staticmethod
|
135
145
|
def validate_connection(source_spec: ConnectorSpecification, config: Mapping[str, Any]) -> None:
|
@@ -149,6 +159,11 @@ class AirbyteEntrypoint(object):
|
|
149
159
|
def airbyte_message_to_string(airbyte_message: AirbyteMessage) -> str:
|
150
160
|
return airbyte_message.json(exclude_unset=True)
|
151
161
|
|
162
|
+
def _emit_queued_messages(self, source) -> Iterable[AirbyteMessage]:
|
163
|
+
if hasattr(source, "message_repository") and source.message_repository:
|
164
|
+
yield from source.message_repository.consume_queue()
|
165
|
+
return
|
166
|
+
|
152
167
|
|
153
168
|
def launch(source: Source, args: List[str]):
|
154
169
|
source_entrypoint = AirbyteEntrypoint(source)
|
@@ -22,6 +22,7 @@ from airbyte_cdk.models import (
|
|
22
22
|
)
|
23
23
|
from airbyte_cdk.models import Type as MessageType
|
24
24
|
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
25
|
+
from airbyte_cdk.sources.message import MessageRepository
|
25
26
|
from airbyte_cdk.sources.source import Source
|
26
27
|
from airbyte_cdk.sources.streams import Stream
|
27
28
|
from airbyte_cdk.sources.streams.core import StreamData
|
@@ -130,6 +131,7 @@ class AbstractSource(Source, ABC):
|
|
130
131
|
yield stream_status_as_airbyte_message(configured_stream, AirbyteStreamStatus.INCOMPLETE)
|
131
132
|
raise e
|
132
133
|
except Exception as e:
|
134
|
+
yield from self._emit_queued_messages()
|
133
135
|
logger.exception(f"Encountered an exception while reading stream {configured_stream.stream.name}")
|
134
136
|
logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
|
135
137
|
yield stream_status_as_airbyte_message(configured_stream, AirbyteStreamStatus.INCOMPLETE)
|
@@ -198,6 +200,7 @@ class AbstractSource(Source, ABC):
|
|
198
200
|
logger.info(f"Marking stream {stream_name} as RUNNING")
|
199
201
|
# If we just read the first record of the stream, emit the transition to the RUNNING state
|
200
202
|
yield stream_status_as_airbyte_message(configured_stream, AirbyteStreamStatus.RUNNING)
|
203
|
+
yield from self._emit_queued_messages()
|
201
204
|
yield record
|
202
205
|
|
203
206
|
logger.info(f"Read {record_counter} records from {stream_name} stream")
|
@@ -264,6 +267,7 @@ class AbstractSource(Source, ABC):
|
|
264
267
|
record_counter = 0
|
265
268
|
for message_counter, record_data_or_message in enumerate(records, start=1):
|
266
269
|
message = self._get_message(record_data_or_message, stream_instance)
|
270
|
+
yield from self._emit_queued_messages()
|
267
271
|
yield message
|
268
272
|
if message.type == MessageType.RECORD:
|
269
273
|
record = message.record
|
@@ -298,6 +302,11 @@ class AbstractSource(Source, ABC):
|
|
298
302
|
"""
|
299
303
|
return logger.isEnabledFor(logging.DEBUG)
|
300
304
|
|
305
|
+
def _emit_queued_messages(self):
|
306
|
+
if self.message_repository:
|
307
|
+
yield from self.message_repository.consume_queue()
|
308
|
+
return
|
309
|
+
|
301
310
|
def _read_full_refresh(
|
302
311
|
self,
|
303
312
|
logger: logging.Logger,
|
@@ -357,3 +366,7 @@ class AbstractSource(Source, ABC):
|
|
357
366
|
return record_data_or_message
|
358
367
|
else:
|
359
368
|
return stream_data_to_airbyte_message(stream.name, record_data_or_message, stream.transformer, stream.get_json_schema())
|
369
|
+
|
370
|
+
@property
|
371
|
+
def message_repository(self) -> Union[None, MessageRepository]:
|
372
|
+
return None
|
@@ -580,10 +580,40 @@ definitions:
|
|
580
580
|
- "{{ config['record_cursor'] }}"
|
581
581
|
datetime_format:
|
582
582
|
title: Cursor Field Datetime Format
|
583
|
-
description:
|
583
|
+
description: |
|
584
|
+
The datetime format of the Cursor Field. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:
|
585
|
+
* **%s**: Epoch unix timestamp - `1686218963`
|
586
|
+
* **%a**: Weekday (abbreviated) - `Sun`
|
587
|
+
* **%A**: Weekday (full) - `Sunday`
|
588
|
+
* **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)
|
589
|
+
* **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`
|
590
|
+
* **%b**: Month (abbreviated) - `Jan`
|
591
|
+
* **%B**: Month (full) - `January`
|
592
|
+
* **%m**: Month (zero-padded) - `01`, `02`, ..., `12`
|
593
|
+
* **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`
|
594
|
+
* **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`
|
595
|
+
* **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`
|
596
|
+
* **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`
|
597
|
+
* **%p**: AM/PM indicator
|
598
|
+
* **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`
|
599
|
+
* **%S**: Second (zero-padded) - `00`, `01`, ..., `59`
|
600
|
+
* **%f**: Microsecond (zero-padded to 6 digits) - `000000`
|
601
|
+
* **%z**: UTC offset - `(empty)`, `+0000`, `-0400`
|
602
|
+
* **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`
|
603
|
+
* **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`
|
604
|
+
* **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`
|
605
|
+
* **%W**: Week number of the year (starting Monday) - `00`, ..., `53`
|
606
|
+
* **%c**: Date and time - `Tue Aug 16 21:30:00 1988`
|
607
|
+
* **%x**: Date standard format - `08/16/1988`
|
608
|
+
* **%X**: Time standard format - `21:30:00`
|
609
|
+
* **%%**: Literal '%' character
|
610
|
+
|
611
|
+
Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).
|
584
612
|
type: string
|
585
613
|
examples:
|
586
614
|
- "%Y-%m-%dT%H:%M:%S.%f%z"
|
615
|
+
- "%Y-%m-%d"
|
616
|
+
- "%s"
|
587
617
|
cursor_granularity:
|
588
618
|
title: Cursor Granularity
|
589
619
|
description:
|
@@ -1283,11 +1313,39 @@ definitions:
|
|
1283
1313
|
- "{{ config['start_time'] }}"
|
1284
1314
|
datetime_format:
|
1285
1315
|
title: Datetime Format
|
1286
|
-
description:
|
1316
|
+
description: |
|
1317
|
+
Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:
|
1318
|
+
* **%s**: Epoch unix timestamp - `1686218963`
|
1319
|
+
* **%a**: Weekday (abbreviated) - `Sun`
|
1320
|
+
* **%A**: Weekday (full) - `Sunday`
|
1321
|
+
* **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)
|
1322
|
+
* **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`
|
1323
|
+
* **%b**: Month (abbreviated) - `Jan`
|
1324
|
+
* **%B**: Month (full) - `January`
|
1325
|
+
* **%m**: Month (zero-padded) - `01`, `02`, ..., `12`
|
1326
|
+
* **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`
|
1327
|
+
* **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`
|
1328
|
+
* **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`
|
1329
|
+
* **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`
|
1330
|
+
* **%p**: AM/PM indicator
|
1331
|
+
* **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`
|
1332
|
+
* **%S**: Second (zero-padded) - `00`, `01`, ..., `59`
|
1333
|
+
* **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`
|
1334
|
+
* **%z**: UTC offset - `(empty)`, `+0000`, `-0400`, `+1030`, `+063415`, `-030712.345216`
|
1335
|
+
* **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`
|
1336
|
+
* **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`
|
1337
|
+
* **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`
|
1338
|
+
* **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`
|
1339
|
+
* **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`
|
1340
|
+
* **%x**: Date representation - `08/16/1988`
|
1341
|
+
* **%X**: Time representation - `21:30:00`
|
1342
|
+
* **%%**: Literal '%' character
|
1343
|
+
|
1344
|
+
Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).
|
1287
1345
|
type: string
|
1288
1346
|
default: ""
|
1289
1347
|
examples:
|
1290
|
-
- "%Y-%m-%dT%H:%M:%S.%f%"
|
1348
|
+
- "%Y-%m-%dT%H:%M:%S.%f%z"
|
1291
1349
|
- "%Y-%m-%d"
|
1292
1350
|
- "%s"
|
1293
1351
|
max_datetime:
|
@@ -26,6 +26,7 @@ from airbyte_cdk.sources.declarative.parsers.manifest_component_transformer impo
|
|
26
26
|
from airbyte_cdk.sources.declarative.parsers.manifest_reference_resolver import ManifestReferenceResolver
|
27
27
|
from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import ModelToComponentFactory
|
28
28
|
from airbyte_cdk.sources.declarative.types import ConnectionDefinition
|
29
|
+
from airbyte_cdk.sources.message import MessageRepository
|
29
30
|
from airbyte_cdk.sources.streams.core import Stream
|
30
31
|
from jsonschema.exceptions import ValidationError
|
31
32
|
from jsonschema.validators import validate
|
@@ -61,6 +62,7 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
61
62
|
self._debug = debug
|
62
63
|
self._emit_connector_builder_messages = emit_connector_builder_messages
|
63
64
|
self._constructor = component_factory if component_factory else ModelToComponentFactory(emit_connector_builder_messages)
|
65
|
+
self._message_repository = self._constructor.get_message_repository()
|
64
66
|
|
65
67
|
self._validate_source()
|
66
68
|
|
@@ -68,6 +70,10 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
68
70
|
def resolved_manifest(self) -> Mapping[str, Any]:
|
69
71
|
return self._source_config
|
70
72
|
|
73
|
+
@property
|
74
|
+
def message_repository(self) -> Union[None, MessageRepository]:
|
75
|
+
return self._message_repository
|
76
|
+
|
71
77
|
@property
|
72
78
|
def connection_checker(self) -> ConnectionChecker:
|
73
79
|
check = self._source_config["check"]
|
@@ -453,8 +453,8 @@ class MinMaxDatetime(BaseModel):
|
|
453
453
|
)
|
454
454
|
datetime_format: Optional[str] = Field(
|
455
455
|
"",
|
456
|
-
description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use %
|
457
|
-
examples=["%Y-%m-%dT%H:%M:%S.%f%", "%Y-%m-%d", "%s"],
|
456
|
+
description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-0400`, `+1030`, `+063415`, `-030712.345216`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date representation - `08/16/1988`\n * **%X**: Time representation - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n',
|
457
|
+
examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s"],
|
458
458
|
title="Datetime Format",
|
459
459
|
)
|
460
460
|
max_datetime: Optional[str] = Field(
|
@@ -806,8 +806,8 @@ class DatetimeBasedCursor(BaseModel):
|
|
806
806
|
)
|
807
807
|
datetime_format: str = Field(
|
808
808
|
...,
|
809
|
-
description="The datetime format of the Cursor Field.",
|
810
|
-
examples=["%Y-%m-%dT%H:%M:%S.%f%z"],
|
809
|
+
description="The datetime format of the Cursor Field. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-0400`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal '%' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n",
|
810
|
+
examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s"],
|
811
811
|
title="Cursor Field Datetime Format",
|
812
812
|
)
|
813
813
|
cursor_granularity: Optional[str] = Field(
|
@@ -100,6 +100,7 @@ from airbyte_cdk.sources.declarative.stream_slicers import CartesianProductStrea
|
|
100
100
|
from airbyte_cdk.sources.declarative.transformations import AddFields, RemoveFields
|
101
101
|
from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition
|
102
102
|
from airbyte_cdk.sources.declarative.types import Config
|
103
|
+
from airbyte_cdk.sources.message import InMemoryMessageRepository
|
103
104
|
from pydantic import BaseModel
|
104
105
|
|
105
106
|
ComponentDefinition: Union[Literal, Mapping, List]
|
@@ -121,6 +122,7 @@ class ModelToComponentFactory:
|
|
121
122
|
self._limit_slices_fetched = limit_slices_fetched
|
122
123
|
self._emit_connector_builder_messages = emit_connector_builder_messages
|
123
124
|
self._disable_retries = disable_retries
|
125
|
+
self._message_repository = InMemoryMessageRepository()
|
124
126
|
|
125
127
|
def _init_mappings(self):
|
126
128
|
self.PYDANTIC_MODEL_TO_CONSTRUCTOR: [Type[BaseModel], Callable] = {
|
@@ -675,8 +677,7 @@ class ModelToComponentFactory:
|
|
675
677
|
def create_no_pagination(model: NoPaginationModel, config: Config, **kwargs) -> NoPagination:
|
676
678
|
return NoPagination(parameters={})
|
677
679
|
|
678
|
-
|
679
|
-
def create_oauth_authenticator(model: OAuthAuthenticatorModel, config: Config, **kwargs) -> DeclarativeOauth2Authenticator:
|
680
|
+
def create_oauth_authenticator(self, model: OAuthAuthenticatorModel, config: Config, **kwargs) -> DeclarativeOauth2Authenticator:
|
680
681
|
if model.refresh_token_updater:
|
681
682
|
return DeclarativeSingleUseRefreshTokenOauth2Authenticator(
|
682
683
|
config,
|
@@ -693,6 +694,7 @@ class ModelToComponentFactory:
|
|
693
694
|
refresh_request_body=InterpolatedMapping(model.refresh_request_body or {}, parameters=model.parameters).eval(config),
|
694
695
|
scopes=model.scopes,
|
695
696
|
token_expiry_date_format=model.token_expiry_date_format,
|
697
|
+
message_repository=self._message_repository,
|
696
698
|
)
|
697
699
|
return DeclarativeOauth2Authenticator(
|
698
700
|
access_token_name=model.access_token_name,
|
@@ -845,3 +847,6 @@ class ModelToComponentFactory:
|
|
845
847
|
return WaitUntilTimeFromHeaderBackoffStrategy(
|
846
848
|
header=model.header, parameters=model.parameters, config=config, min_wait=model.min_wait, regex=model.regex
|
847
849
|
)
|
850
|
+
|
851
|
+
def get_message_repository(self):
|
852
|
+
return self._message_repository
|
@@ -0,0 +1,36 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from abc import ABC, abstractmethod
|
6
|
+
from typing import Iterable
|
7
|
+
|
8
|
+
from airbyte_cdk.models import AirbyteMessage, Type
|
9
|
+
|
10
|
+
|
11
|
+
class MessageRepository(ABC):
|
12
|
+
@abstractmethod
|
13
|
+
def emit_message(self, message: AirbyteMessage) -> None:
|
14
|
+
raise NotImplementedError()
|
15
|
+
|
16
|
+
@abstractmethod
|
17
|
+
def consume_queue(self) -> Iterable[AirbyteMessage]:
|
18
|
+
raise NotImplementedError()
|
19
|
+
|
20
|
+
|
21
|
+
class InMemoryMessageRepository(MessageRepository):
|
22
|
+
def __init__(self):
|
23
|
+
self._message_queue = []
|
24
|
+
|
25
|
+
def emit_message(self, message: AirbyteMessage) -> None:
|
26
|
+
"""
|
27
|
+
:param message: As of today, only AirbyteControlMessages are supported given that supporting other types of message will need more
|
28
|
+
work and therefore this work has been postponed
|
29
|
+
"""
|
30
|
+
if message.type != Type.CONTROL:
|
31
|
+
raise ValueError("As of today, only AirbyteControlMessages are supported as part of the InMemoryMessageRepository")
|
32
|
+
self._message_queue.append(message)
|
33
|
+
|
34
|
+
def consume_queue(self) -> Iterable[AirbyteMessage]:
|
35
|
+
while self._message_queue:
|
36
|
+
yield self._message_queue.pop(0)
|
@@ -79,7 +79,12 @@ class AbstractOauth2Authenticator(AuthBase):
|
|
79
79
|
)
|
80
80
|
def _get_refresh_access_token_response(self):
|
81
81
|
try:
|
82
|
-
response = requests.request(
|
82
|
+
response = requests.request(
|
83
|
+
method="POST",
|
84
|
+
url=self.get_token_refresh_endpoint(),
|
85
|
+
data=self.build_refresh_request_body(),
|
86
|
+
headers={"Content-Type": "application/json"},
|
87
|
+
)
|
83
88
|
response.raise_for_status()
|
84
89
|
return response.json()
|
85
90
|
except requests.exceptions.RequestException as e:
|
@@ -6,7 +6,8 @@ from typing import Any, List, Mapping, Optional, Sequence, Tuple, Union
|
|
6
6
|
|
7
7
|
import dpath
|
8
8
|
import pendulum
|
9
|
-
from airbyte_cdk.config_observation import emit_configuration_as_airbyte_control_message
|
9
|
+
from airbyte_cdk.config_observation import create_connector_config_control_message, emit_configuration_as_airbyte_control_message
|
10
|
+
from airbyte_cdk.sources.message import MessageRepository
|
10
11
|
from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_oauth import AbstractOauth2Authenticator
|
11
12
|
|
12
13
|
|
@@ -115,6 +116,7 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
|
|
115
116
|
refresh_token_config_path: Sequence[str] = ("credentials", "refresh_token"),
|
116
117
|
token_expiry_date_config_path: Sequence[str] = ("credentials", "token_expiry_date"),
|
117
118
|
token_expiry_date_format: Optional[str] = None,
|
119
|
+
message_repository: MessageRepository = None,
|
118
120
|
):
|
119
121
|
"""
|
120
122
|
|
@@ -144,6 +146,7 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
|
|
144
146
|
self._token_expiry_date_format = token_expiry_date_format
|
145
147
|
self._refresh_token_name = refresh_token_name
|
146
148
|
self._connector_config = connector_config
|
149
|
+
self._message_repository = message_repository
|
147
150
|
super().__init__(
|
148
151
|
token_refresh_endpoint,
|
149
152
|
self.get_client_id(),
|
@@ -211,7 +214,11 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
|
|
211
214
|
self.access_token = new_access_token
|
212
215
|
self.set_refresh_token(new_refresh_token)
|
213
216
|
self.set_token_expiry_date(new_token_expiry_date)
|
214
|
-
|
217
|
+
if self._message_repository:
|
218
|
+
self._message_repository.emit_message(create_connector_config_control_message(self._connector_config))
|
219
|
+
else:
|
220
|
+
# FIXME emit_configuration_as_airbyte_control_message as been deprecated in favor of package airbyte_cdk.sources.message
|
221
|
+
emit_configuration_as_airbyte_control_message(self._connector_config)
|
215
222
|
return self.access_token
|
216
223
|
|
217
224
|
def refresh_access_token(self) -> Tuple[str, str, str]:
|
@@ -0,0 +1,80 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from typing import Any, Dict, Union
|
6
|
+
|
7
|
+
from airbyte_cdk.models import AirbyteRecordMessage
|
8
|
+
from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser
|
9
|
+
|
10
|
+
|
11
|
+
class DatetimeFormatInferrer:
|
12
|
+
"""
|
13
|
+
This class is used to detect toplevel fields in records that might be datetime values, along with the used format.
|
14
|
+
"""
|
15
|
+
|
16
|
+
def __init__(self):
|
17
|
+
self._parser = DatetimeParser()
|
18
|
+
self._datetime_candidates: Union[None, Dict[str, str]] = None
|
19
|
+
self._formats = [
|
20
|
+
"%Y-%m-%d",
|
21
|
+
"%Y-%m-%d %H:%M:%S",
|
22
|
+
"%Y-%m-%d %H:%M:%S.%f+00:00",
|
23
|
+
"%Y-%m-%dT%H:%M:%S.%f%z",
|
24
|
+
"%s",
|
25
|
+
"%d/%m/%Y %H:%M",
|
26
|
+
"%Y-%m",
|
27
|
+
"%d-%m-%Y",
|
28
|
+
"%Y-%m-%dT%H:%M:%SZ",
|
29
|
+
]
|
30
|
+
self._timestamp_heuristic_range = range(1_000_000_000, 2_000_000_000)
|
31
|
+
|
32
|
+
def _can_be_datetime(self, value: Any) -> bool:
|
33
|
+
"""Checks if the value can be a datetime. This is the case if the value is a string or an integer between 1_000_000_000 and 2_000_000_000. This is separate from the format check for performance reasons"""
|
34
|
+
if isinstance(value, str) and (not value.isdecimal() or int(value) in self._timestamp_heuristic_range):
|
35
|
+
return True
|
36
|
+
if isinstance(value, int) and value in self._timestamp_heuristic_range:
|
37
|
+
return True
|
38
|
+
return False
|
39
|
+
|
40
|
+
def _matches_format(self, value: Any, format: str) -> bool:
|
41
|
+
"""Checks if the value matches the format"""
|
42
|
+
try:
|
43
|
+
self._parser.parse(value, format)
|
44
|
+
return True
|
45
|
+
except ValueError:
|
46
|
+
return False
|
47
|
+
|
48
|
+
def _initialize(self, record: AirbyteRecordMessage):
|
49
|
+
"""Initializes the internal state of the class"""
|
50
|
+
self._datetime_candidates = {}
|
51
|
+
for field_name, field_value in record.data.items():
|
52
|
+
if not self._can_be_datetime(field_value):
|
53
|
+
continue
|
54
|
+
for format in self._formats:
|
55
|
+
if self._matches_format(field_value, format):
|
56
|
+
self._datetime_candidates[field_name] = format
|
57
|
+
break
|
58
|
+
|
59
|
+
def _validate(self, record: AirbyteRecordMessage):
|
60
|
+
"""Validates that the record is consistent with the inferred datetime formats"""
|
61
|
+
for candidate_field_name in list(self._datetime_candidates.keys()):
|
62
|
+
candidate_field_format = self._datetime_candidates[candidate_field_name]
|
63
|
+
current_value = record.data.get(candidate_field_name, None)
|
64
|
+
if (
|
65
|
+
current_value is None
|
66
|
+
or not self._can_be_datetime(current_value)
|
67
|
+
or not self._matches_format(current_value, candidate_field_format)
|
68
|
+
):
|
69
|
+
self._datetime_candidates.pop(candidate_field_name)
|
70
|
+
|
71
|
+
def accumulate(self, record: AirbyteRecordMessage):
|
72
|
+
"""Analyzes the record and updates the internal state of candidate datetime fields"""
|
73
|
+
self._initialize(record) if self._datetime_candidates is None else self._validate(record)
|
74
|
+
|
75
|
+
def get_inferred_datetime_formats(self) -> Dict[str, str]:
|
76
|
+
"""
|
77
|
+
Returns the list of candidate datetime fields - the keys are the field names and the values are the inferred datetime formats.
|
78
|
+
For these fields the format was consistent across all visited records.
|
79
|
+
"""
|
80
|
+
return self._datetime_candidates or {}
|
@@ -1,32 +1,32 @@
|
|
1
1
|
airbyte_cdk/__init__.py,sha256=OBQWv5rF_QTRpOiP6J8J8oTU-GGrfi18i1PRFpahKks,262
|
2
|
-
airbyte_cdk/config_observation.py,sha256=
|
2
|
+
airbyte_cdk/config_observation.py,sha256=3kjxv8xTwCnub2_fTWnMPRx0E7vly1BUeyXOSK15Ql4,3610
|
3
3
|
airbyte_cdk/connector.py,sha256=LtTAmBFV1LBUz_fOEbQ_EvBhyUsz8AGOlDsvK8QOOo0,4396
|
4
|
-
airbyte_cdk/entrypoint.py,sha256=
|
4
|
+
airbyte_cdk/entrypoint.py,sha256=xQ7jLhElMl-Nl1aWHnlaPbCaVv6UNFuspBUo9w7glbU,8803
|
5
5
|
airbyte_cdk/exception_handler.py,sha256=CwkiPdZ1WMOr3CBkvKFyHiyLerXGRqBrVlB4p0OImGI,1125
|
6
6
|
airbyte_cdk/logger.py,sha256=4Mi2MEQi1uh59BP9Dxw_UEbZuxaJewqK_jvEU2b10nk,3985
|
7
7
|
airbyte_cdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
8
|
airbyte_cdk/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
9
9
|
airbyte_cdk/connector_builder/connector_builder_handler.py,sha256=q8mqQjNqpvHZgwVbNuvSe19o4Aw6MQTuhA2URmdz0K0,5443
|
10
10
|
airbyte_cdk/connector_builder/main.py,sha256=jn2gqaYAvd6uDoFe0oVhnY23grm5sL-jfIX6kGvhVxk,2994
|
11
|
-
airbyte_cdk/connector_builder/message_grouper.py,sha256=
|
12
|
-
airbyte_cdk/connector_builder/models.py,sha256=
|
11
|
+
airbyte_cdk/connector_builder/message_grouper.py,sha256=dGU85tsOvHkAoQD2lNHA_ibqdr9MNiGlt60nOCuA6yI,12502
|
12
|
+
airbyte_cdk/connector_builder/models.py,sha256=jL2SJIWJTLCbBqobw5Qo8WGS0aN-K9TRmfSpDHM5vYc,1277
|
13
13
|
airbyte_cdk/destinations/__init__.py,sha256=0Uxmz3iBAyZJdk_bqUVt2pb0UwRTpFjTnFE6fQFbWKY,126
|
14
14
|
airbyte_cdk/destinations/destination.py,sha256=_tIMnKcRQbtIsjVvNOVjfbIxgCNLuBXQwQj8MyVm3BI,5420
|
15
15
|
airbyte_cdk/models/__init__.py,sha256=LPQcYdDPwrCXiBPe_jexO4UAcbovIb1V9tHB6I7Un30,633
|
16
16
|
airbyte_cdk/models/airbyte_protocol.py,sha256=wKXV_4sCzmUyPndiW7HWAj_A6EDRJyk9cA88xvXGQN0,117
|
17
17
|
airbyte_cdk/models/well_known_types.py,sha256=KKfNbow2gdLoC1Z4hcXy_JR8m_acsB2ol7gQuEgjobw,117
|
18
18
|
airbyte_cdk/sources/__init__.py,sha256=4j6fLtoRCjcZnojpise4EMmQtV1RepBxoGTBgpz80JA,218
|
19
|
-
airbyte_cdk/sources/abstract_source.py,sha256=
|
19
|
+
airbyte_cdk/sources/abstract_source.py,sha256=IpHvPKhYvv36b-krP9vn1wowrfi9iZdqcxDGbl2-jVE,17743
|
20
20
|
airbyte_cdk/sources/config.py,sha256=PYsY7y2u3EUwxLiEb96JnuKwH_E8CuxKggsRO2ZPSRc,856
|
21
21
|
airbyte_cdk/sources/connector_state_manager.py,sha256=_R-2QnMGimKL0t5aV4f6P1dgd--TB3abY5Seg1xddXk,10469
|
22
22
|
airbyte_cdk/sources/source.py,sha256=N3vHZzdUsBETFsql-YpO-LcgjolT_jcnAuHBhGD6Hqk,4278
|
23
23
|
airbyte_cdk/sources/declarative/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
24
24
|
airbyte_cdk/sources/declarative/create_partial.py,sha256=sUJOwD8hBzW4pxw2XhYlSTMgl-WMc5WpP5Oq_jo3fHw,3371
|
25
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256
|
25
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=O_U5vwYhXP19mkWhjJgRJCTHAPwf6xeOEbNDccUb_wg,78273
|
26
26
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=U2As9PDKmcWDgbsWUo-RetJ9fxQOBlwntWZ0NOgs5Ac,1453
|
27
27
|
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=0iZSpypxt8bhO3Lmf3BpGRTO7Fp0Q2GI8m8xyJJUjeM,6580
|
28
28
|
airbyte_cdk/sources/declarative/exceptions.py,sha256=kTPUA4I2NV4J6HDz-mKPGMrfuc592akJnOyYx38l_QM,176
|
29
|
-
airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=
|
29
|
+
airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=lJCJAHmKPssqnDLAnxU6fuwlNAVm_Ut1EQLTxDy8h1I,10018
|
30
30
|
airbyte_cdk/sources/declarative/types.py,sha256=b_RJpL9TyAgxJIRYZx5BxpC39p-WccHKxbAqxWrn9oE,482
|
31
31
|
airbyte_cdk/sources/declarative/yaml_declarative_source.py,sha256=I9Bs9RDsFT8JNiJWRDjKYhqwvv4pqzgYZtF5hVuTDqI,1684
|
32
32
|
airbyte_cdk/sources/declarative/auth/__init__.py,sha256=DyQdO5mdKGsttWdEUqxb6WVgD7zTcvpJz-Oet_VNeBg,201
|
@@ -60,14 +60,14 @@ airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=dyIM-bzh54
|
|
60
60
|
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=Dc0F87nElWsz_Ikj938eQ9uqZvyqgFhZ8Dqf_-hvndc,4800
|
61
61
|
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=V6WGKJ9cXX1rjuM4bK3Cs9xEryMlkY2U3FMsSBhrgC8,3098
|
62
62
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=EiYnzwCHZV7EYqMJqcy6xKSeHvTKZBsQndjbEwmiTW4,93
|
63
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=
|
63
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=5RI0o8lTGBt4emHSF-Xsk0FE4LccnMCTBk-LK0PdiMA,53677
|
64
64
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
65
65
|
airbyte_cdk/sources/declarative/parsers/class_types_registry.py,sha256=bK4a74opm6WHyV7HqOVws6GE5Z7cLNc5MaTha69abIQ,6086
|
66
66
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=y7_G5mM07zxT5YG975kdC2PAja-Uc83pYp8WrV3GNdo,522
|
67
67
|
airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py,sha256=W8BcK4KOg4ifNXgsdeIoV4oneHjXBKcPHEZHIC4r-hM,3801
|
68
68
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=H23H3nURCxsvjq66Gn9naffp0HJ1fU03wLFu-5F0AhQ,7701
|
69
69
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=6ukHx0bBrCJm9rek1l_MEfS3U_gdJcM4pJRyifJEOp0,6412
|
70
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
70
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=u0dVRPddHEL6OYxFIT3Z6TC-8MiFeKKKw0gPMMpn37Y,48075
|
71
71
|
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=27sOWhw2LBQs62HchURakHQ2M_mtnOatNgU6q8RUtpU,476
|
72
72
|
airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha256=fa6VtTwSoIkDI3SBoRtVx79opVtJX80_gU9bt31lspc,4785
|
73
73
|
airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py,sha256=Fi3ocNZZoYkr0uvRgwoVSqne6enxRvi8DOHrASVK2PQ,1851
|
@@ -125,6 +125,8 @@ airbyte_cdk/sources/declarative/transformations/transformation.py,sha256=q_FDDDY
|
|
125
125
|
airbyte_cdk/sources/deprecated/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
126
126
|
airbyte_cdk/sources/deprecated/base_source.py,sha256=5FafxPLDAh2KNBnKxxlC8QvPRgDYUjmT5OzqEKz8kjI,3524
|
127
127
|
airbyte_cdk/sources/deprecated/client.py,sha256=6G2xQZJ2BzMJa-Sq4VdvVM9Dwu11rEEwqHGhmXAb3h4,3560
|
128
|
+
airbyte_cdk/sources/message/__init__.py,sha256=WIXPTh8Sx18LhEV6sZ_aI5CDqfjc0b2KPBeKZKwbs6I,193
|
129
|
+
airbyte_cdk/sources/message/repository.py,sha256=VMGusWUdxtz6WGs0Lv-ut-CbVR222HdjhHvde1shg3E,1187
|
128
130
|
airbyte_cdk/sources/singer/__init__.py,sha256=D3zQSiWT0B9t0kKE4JPZjrcDnP2YnFNJ3dfYqSaxo9w,246
|
129
131
|
airbyte_cdk/sources/singer/singer_helpers.py,sha256=q1LmgjFxSnN-dobMy7nikUwcK-9FvW5QQfgTqiclbAE,15649
|
130
132
|
airbyte_cdk/sources/singer/source.py,sha256=3YY8UTOXmctvMVUnYmIegmL3_IxF55iGP_bc_s2MZdY,8530
|
@@ -141,9 +143,9 @@ airbyte_cdk/sources/streams/http/auth/core.py,sha256=_s9wewvvIcOgYjhHGDj_YHApnF5
|
|
141
143
|
airbyte_cdk/sources/streams/http/auth/oauth.py,sha256=zchPWN1utNg02F93f5b4UFI5OXYo8-QhocbsXhLdG4U,4135
|
142
144
|
airbyte_cdk/sources/streams/http/auth/token.py,sha256=oU1ul0LsGsPGN_vOJOKw1xX2y_XWULRxjqXu7Rivcr8,1940
|
143
145
|
airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py,sha256=RN0D3nOX1xLgwEwKWu6pkGy3XqBFzKSNZ8Lf6umU2eY,413
|
144
|
-
airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py,sha256=
|
146
|
+
airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py,sha256=CRfMunZdowlUyaAgIG76NwUo2xISTjs1AJBbJMaZ-p0,5464
|
145
147
|
airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py,sha256=T0hVF2cBXGgIfrCslvTC1uNm9rNbYjENNl2Cb3mXuSY,961
|
146
|
-
airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py,sha256=
|
148
|
+
airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py,sha256=uK5n1oImmFkJJCRukvNNSxCwRcXPV0BAkeOmr5ep6LY,11531
|
147
149
|
airbyte_cdk/sources/streams/http/requests_native_auth/token.py,sha256=hDti8DlF_R5YYX95hg9BPogYtG-KUYtOifrFDv_L3Hk,2456
|
148
150
|
airbyte_cdk/sources/streams/utils/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
149
151
|
airbyte_cdk/sources/streams/utils/stream_helper.py,sha256=8n1e27DqELN_KRXuWW1IE3ZjE9zvhclNqsKtOosI_Ds,1480
|
@@ -156,6 +158,7 @@ airbyte_cdk/sources/utils/schema_models.py,sha256=m1vOqNkkVYGblc492wKo11Zm5FK9F0
|
|
156
158
|
airbyte_cdk/sources/utils/transform.py,sha256=4GYmO6bq33HF-a1in0dKQKqUOYI1bWItyuYF875bSQg,9493
|
157
159
|
airbyte_cdk/utils/__init__.py,sha256=kFLcs2P-tbPyeVOJS9rOv1jZdnSpjG24ro0CHgt_CIk,215
|
158
160
|
airbyte_cdk/utils/airbyte_secrets_utils.py,sha256=q3aDl8T10ufGbeqnUPqbZLxQcHdkf2kDfQK_upWzBbI,2894
|
161
|
+
airbyte_cdk/utils/datetime_format_inferrer.py,sha256=1z5lGq_DI9LFrT68ftlJSqndS6i-Rs1PX7T_RBtOJpA,3443
|
159
162
|
airbyte_cdk/utils/event_timing.py,sha256=Hn5kCc9xGKLcV5EYpJCZwNiz9neKKu2WG8FJF_hy278,2377
|
160
163
|
airbyte_cdk/utils/schema_inferrer.py,sha256=j0us_mEMj8PVVzSZfoS1adK7V7a--mSHQozo6xmsiIc,3720
|
161
164
|
airbyte_cdk/utils/stream_status_utils.py,sha256=X1Vy7BhglycjdIWpfKDfwJussNCxYffelKt6Utjx-qY,1005
|
@@ -163,8 +166,8 @@ airbyte_cdk/utils/traced_exception.py,sha256=9G2sG9eYkvn6Aa7rMuUW_KIRszRaTc_xdnT
|
|
163
166
|
source_declarative_manifest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
164
167
|
source_declarative_manifest/main.py,sha256=HXzuRsRyhHwPrGU-hc4S7RrgoOoHImqkdfbmO2geBeE,1027
|
165
168
|
unit_tests/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
166
|
-
unit_tests/connector_builder/test_connector_builder_handler.py,sha256=
|
167
|
-
unit_tests/connector_builder/test_message_grouper.py,sha256=
|
169
|
+
unit_tests/connector_builder/test_connector_builder_handler.py,sha256=0TQn6C_De9mpRMU6lcrcuKWIwAHKw2GVMG8iT6OTBMo,28489
|
170
|
+
unit_tests/connector_builder/test_message_grouper.py,sha256=MSj9bQd4MtGsmXP-wPHiq4nODbLyrNT-W2CVpNOs2tE,28116
|
168
171
|
unit_tests/connector_builder/utils.py,sha256=AAggdGWP-mNuWOZUHLAVIbjTeIcdPo-3pbMm5zdYpS0,796
|
169
172
|
unit_tests/destinations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
170
173
|
unit_tests/destinations/test_destination.py,sha256=koG_j812KMkcIxoUH6XlAL3zsephZJmlHvyzJXm0dCs,10269
|
@@ -172,7 +175,7 @@ unit_tests/singer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
|
|
172
175
|
unit_tests/singer/test_singer_helpers.py,sha256=pZV6VxJuK-3-FICNGmoGbokrA_zkaFZEd4rYZCVpSRU,1762
|
173
176
|
unit_tests/singer/test_singer_source.py,sha256=edN_kv7dnYAdBveWdUYOs74ak0dK6p8uaX225h_ZILA,4442
|
174
177
|
unit_tests/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
175
|
-
unit_tests/sources/test_abstract_source.py,sha256=
|
178
|
+
unit_tests/sources/test_abstract_source.py,sha256=Gn5XJKQlJhxxqawS5T-81BUXmPNUvg3g2UK-kXq-v48,46351
|
176
179
|
unit_tests/sources/test_config.py,sha256=gFXqU_6OjwHXkV4JHMqQUznxmvTWN8nAv0w0-FFpugc,2477
|
177
180
|
unit_tests/sources/test_connector_state_manager.py,sha256=ynFxA63Cxe6t-wMMh9C6ByTlMAuk8W7H2FikDhnUEQ0,24264
|
178
181
|
unit_tests/sources/test_source.py,sha256=eVtU9Zuc9gBsg11Pb5xjDtyU0gVrbYqbZ4RmzPvDw_M,24695
|
@@ -183,7 +186,7 @@ unit_tests/sources/declarative/test_declarative_stream.py,sha256=3leJnZIYHiFq8XI
|
|
183
186
|
unit_tests/sources/declarative/test_manifest_declarative_source.py,sha256=GckUc3nepzZkD1UM24woHlYCVZb5DP4IAQC3IeMyZF0,58924
|
184
187
|
unit_tests/sources/declarative/test_yaml_declarative_source.py,sha256=6HhsUFgB7ueN0yOUHWb4gpPYLng5jasxN_plvz3x37g,5097
|
185
188
|
unit_tests/sources/declarative/auth/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
186
|
-
unit_tests/sources/declarative/auth/test_oauth.py,sha256=
|
189
|
+
unit_tests/sources/declarative/auth/test_oauth.py,sha256=WOGs28NVOvb0lIy1ymtQgUEbI8r1Z2fBIY6iWBqCnoE,8514
|
187
190
|
unit_tests/sources/declarative/auth/test_session_token_auth.py,sha256=mxWCm_0AyVI6J1Q5CjogXY-EkXFfWkMZjNtBeb0bOow,6135
|
188
191
|
unit_tests/sources/declarative/auth/test_token_auth.py,sha256=EIaxGFvaUE6vAUW2_tBrds6nTx4qhfYK8ppRwoNXKd0,6162
|
189
192
|
unit_tests/sources/declarative/checks/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
@@ -247,6 +250,8 @@ unit_tests/sources/declarative/schema/source_test/__init__.py,sha256=4Hw-PX1-VgE
|
|
247
250
|
unit_tests/sources/declarative/states/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
248
251
|
unit_tests/sources/declarative/stream_slicers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
249
252
|
unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py,sha256=MI1kLtMuC1LKryBzub0KconsrpIVgPOhAtYM4b3qRfA,9507
|
253
|
+
unit_tests/sources/message/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
254
|
+
unit_tests/sources/message/test_repository.py,sha256=qgCFpRUZU_Mm2JePtyIX5KheFYXCDj1ODTlo8z-Yz4Y,2234
|
250
255
|
unit_tests/sources/streams/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
251
256
|
unit_tests/sources/streams/test_availability_strategy.py,sha256=vJrSEk9NwRghu0YsSNoMYHKWzA9UFemwyClpke8Mk2s,2315
|
252
257
|
unit_tests/sources/streams/test_streams_core.py,sha256=YOC7XqWFJ13Z4YuO9Nh4AR4AwpJ-s111vqPplFfpxk4,5059
|
@@ -256,14 +261,15 @@ unit_tests/sources/streams/http/test_http.py,sha256=H0lGcb0XHuM1R7GC3wAaaxhGoNwi
|
|
256
261
|
unit_tests/sources/streams/http/auth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
257
262
|
unit_tests/sources/streams/http/auth/test_auth.py,sha256=gdWpJ-cR64qRXmmPOQWhVd4E6ekXyJEIEfJxA0jlDvc,6546
|
258
263
|
unit_tests/sources/streams/http/requests_native_auth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
259
|
-
unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py,sha256=
|
264
|
+
unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py,sha256=NoTfDSClXFqjbN_zvoleVWO0lDhjR4obWYn5ApQkWnI,14166
|
260
265
|
unit_tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
266
|
+
unit_tests/utils/test_datetime_format_inferrer.py,sha256=Io2o5flTre9gyI_IDDMpzxOjCz3sr16LO0GRqOD59uk,2946
|
261
267
|
unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg4MNPAG-xhpk,7817
|
262
268
|
unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
|
263
269
|
unit_tests/utils/test_stream_status_utils.py,sha256=NpV155JMXA6CG-2Zvofa14lItobyh3Onttc59X4m5DI,3382
|
264
270
|
unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
|
265
|
-
airbyte_cdk-0.40.
|
266
|
-
airbyte_cdk-0.40.
|
267
|
-
airbyte_cdk-0.40.
|
268
|
-
airbyte_cdk-0.40.
|
269
|
-
airbyte_cdk-0.40.
|
271
|
+
airbyte_cdk-0.40.4.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
272
|
+
airbyte_cdk-0.40.4.dist-info/METADATA,sha256=cg5ce7pYJVInYpcsugnD0J80AjRXnzzaKY11MEHKBeg,8902
|
273
|
+
airbyte_cdk-0.40.4.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
274
|
+
airbyte_cdk-0.40.4.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
|
275
|
+
airbyte_cdk-0.40.4.dist-info/RECORD,,
|
@@ -176,9 +176,9 @@ def invalid_config_file(tmp_path):
|
|
176
176
|
|
177
177
|
|
178
178
|
def test_handle_resolve_manifest(valid_resolve_manifest_config_file, dummy_catalog):
|
179
|
-
with mock.patch.object(connector_builder.main, "handle_connector_builder_request") as
|
179
|
+
with mock.patch.object(connector_builder.main, "handle_connector_builder_request") as patched_handle:
|
180
180
|
handle_request(["read", "--config", str(valid_resolve_manifest_config_file), "--catalog", str(dummy_catalog)])
|
181
|
-
assert
|
181
|
+
assert patched_handle.call_count == 1
|
182
182
|
|
183
183
|
|
184
184
|
def test_handle_test_read(valid_read_config_file, configured_catalog):
|
@@ -354,6 +354,7 @@ def test_read():
|
|
354
354
|
],
|
355
355
|
test_read_limit_reached=False,
|
356
356
|
inferred_schema=None,
|
357
|
+
inferred_datetime_formats=None,
|
357
358
|
latest_config_update={}
|
358
359
|
)
|
359
360
|
|
@@ -368,6 +369,7 @@ def test_read():
|
|
368
369
|
],
|
369
370
|
"test_read_limit_reached": False,
|
370
371
|
"inferred_schema": None,
|
372
|
+
"inferred_datetime_formats": None,
|
371
373
|
"latest_config_update": {}
|
372
374
|
},
|
373
375
|
emitted_at=1,
|
@@ -382,6 +384,37 @@ def test_read():
|
|
382
384
|
assert output_record == expected_airbyte_message
|
383
385
|
|
384
386
|
|
387
|
+
def test_config_update():
|
388
|
+
manifest = copy.deepcopy(MANIFEST)
|
389
|
+
manifest["definitions"]["retriever"]["requester"]["authenticator"] = {
|
390
|
+
"type": "OAuthAuthenticator",
|
391
|
+
"token_refresh_endpoint": "https://oauth.endpoint.com/tokens/bearer",
|
392
|
+
"client_id": "{{ config['credentials']['client_id'] }}",
|
393
|
+
"client_secret": "{{ config['credentials']['client_secret'] }}",
|
394
|
+
"refresh_token": "{{ config['credentials']['refresh_token'] }}",
|
395
|
+
"refresh_token_updater": {}
|
396
|
+
}
|
397
|
+
config = copy.deepcopy(TEST_READ_CONFIG)
|
398
|
+
config["__injected_declarative_manifest"] = manifest
|
399
|
+
config["credentials"] = {
|
400
|
+
"client_id": "a client id",
|
401
|
+
"client_secret": "a client secret",
|
402
|
+
"refresh_token": "a refresh token",
|
403
|
+
}
|
404
|
+
source = ManifestDeclarativeSource(manifest)
|
405
|
+
|
406
|
+
refresh_request_response = {
|
407
|
+
"access_token": "an updated access token",
|
408
|
+
"refresh_token": "an updated refresh token",
|
409
|
+
"expires_in": 3600,
|
410
|
+
}
|
411
|
+
with patch("airbyte_cdk.sources.streams.http.requests_native_auth.SingleUseRefreshTokenOauth2Authenticator._get_refresh_access_token_response", return_value=refresh_request_response):
|
412
|
+
output = handle_connector_builder_request(
|
413
|
+
source, "test_read", config, ConfiguredAirbyteCatalog.parse_obj(CONFIGURED_CATALOG), TestReadLimits()
|
414
|
+
)
|
415
|
+
assert output.record.data["latest_config_update"]
|
416
|
+
|
417
|
+
|
385
418
|
@patch("traceback.TracebackException.from_exception")
|
386
419
|
def test_read_returns_error_response(mock_from_exception):
|
387
420
|
class MockManifestDeclarativeSource:
|
@@ -410,7 +443,8 @@ def test_read_returns_error_response(mock_from_exception):
|
|
410
443
|
slice_descriptor=None, state=None)],
|
411
444
|
test_read_limit_reached=False,
|
412
445
|
inferred_schema=None,
|
413
|
-
|
446
|
+
inferred_datetime_formats={},
|
447
|
+
latest_config_update=None)
|
414
448
|
|
415
449
|
expected_message = AirbyteMessage(
|
416
450
|
type=MessageType.RECORD,
|
@@ -94,7 +94,8 @@ def test_get_grouped_messages(mock_entrypoint_read):
|
|
94
94
|
"body": {"custom": "field"},
|
95
95
|
}
|
96
96
|
response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}', "http_method": "GET"}
|
97
|
-
expected_schema = {"$schema": "http://json-schema.org/schema#", "properties": {"name": {"type": "string"}}, "type": "object"}
|
97
|
+
expected_schema = {"$schema": "http://json-schema.org/schema#", "properties": {"name": {"type": "string"}, "date": {"type": "string"}}, "type": "object"}
|
98
|
+
expected_datetime_fields = {"date":"%Y-%m-%d"}
|
98
99
|
expected_pages = [
|
99
100
|
StreamReadPages(
|
100
101
|
request=HttpRequest(
|
@@ -105,7 +106,7 @@ def test_get_grouped_messages(mock_entrypoint_read):
|
|
105
106
|
http_method="GET",
|
106
107
|
),
|
107
108
|
response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'),
|
108
|
-
records=[{"name": "Shinobu Kocho"}, {"name": "Muichiro Tokito"}],
|
109
|
+
records=[{"name": "Shinobu Kocho", "date": "2023-03-03"}, {"name": "Muichiro Tokito", "date": "2023-03-04"}],
|
109
110
|
),
|
110
111
|
StreamReadPages(
|
111
112
|
request=HttpRequest(
|
@@ -116,7 +117,7 @@ def test_get_grouped_messages(mock_entrypoint_read):
|
|
116
117
|
http_method="GET",
|
117
118
|
),
|
118
119
|
response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'),
|
119
|
-
records=[{"name": "Mitsuri Kanroji"}],
|
120
|
+
records=[{"name": "Mitsuri Kanroji", "date": "2023-03-05"}],
|
120
121
|
),
|
121
122
|
]
|
122
123
|
|
@@ -124,11 +125,11 @@ def test_get_grouped_messages(mock_entrypoint_read):
|
|
124
125
|
[
|
125
126
|
request_log_message(request),
|
126
127
|
response_log_message(response),
|
127
|
-
record_message("hashiras", {"name": "Shinobu Kocho"}),
|
128
|
-
record_message("hashiras", {"name": "Muichiro Tokito"}),
|
128
|
+
record_message("hashiras", {"name": "Shinobu Kocho", "date": "2023-03-03"}),
|
129
|
+
record_message("hashiras", {"name": "Muichiro Tokito", "date": "2023-03-04"}),
|
129
130
|
request_log_message(request),
|
130
131
|
response_log_message(response),
|
131
|
-
record_message("hashiras", {"name": "Mitsuri Kanroji"}),
|
132
|
+
record_message("hashiras", {"name": "Mitsuri Kanroji", "date": "2023-03-05"}),
|
132
133
|
]
|
133
134
|
))
|
134
135
|
|
@@ -138,6 +139,7 @@ def test_get_grouped_messages(mock_entrypoint_read):
|
|
138
139
|
)
|
139
140
|
|
140
141
|
assert actual_response.inferred_schema == expected_schema
|
142
|
+
assert actual_response.inferred_datetime_formats == expected_datetime_fields
|
141
143
|
|
142
144
|
single_slice = actual_response.slices[0]
|
143
145
|
for i, actual_page in enumerate(single_slice.pages):
|
@@ -552,18 +554,6 @@ def test_given_control_message_then_stream_read_has_config_update(mock_entrypoin
|
|
552
554
|
assert stream_read.latest_config_update == updated_config
|
553
555
|
|
554
556
|
|
555
|
-
@patch('airbyte_cdk.connector_builder.message_grouper.AirbyteEntrypoint.read')
|
556
|
-
def test_given_no_control_message_then_use_in_memory_config_change_as_update(mock_entrypoint_read):
|
557
|
-
mock_source = make_mock_source(mock_entrypoint_read, iter(any_request_and_response_with_a_record()))
|
558
|
-
connector_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
|
559
|
-
full_config = {**CONFIG, **{"__injected_declarative_manifest": MANIFEST}}
|
560
|
-
stream_read: StreamRead = connector_builder_handler.get_message_groups(
|
561
|
-
source=mock_source, config=full_config, configured_catalog=create_configured_catalog("hashiras")
|
562
|
-
)
|
563
|
-
|
564
|
-
assert stream_read.latest_config_update == CONFIG
|
565
|
-
|
566
|
-
|
567
557
|
@patch('airbyte_cdk.connector_builder.message_grouper.AirbyteEntrypoint.read')
|
568
558
|
def test_given_multiple_control_messages_then_stream_read_has_latest_based_on_emitted_at(mock_entrypoint_read):
|
569
559
|
earliest = 0
|
@@ -203,7 +203,7 @@ class TestOauth2Authenticator:
|
|
203
203
|
assert oauth.get_token_expiry_date() == pendulum.parse(next_day)
|
204
204
|
|
205
205
|
|
206
|
-
def mock_request(method, url, data):
|
207
|
-
if url == "refresh_end":
|
206
|
+
def mock_request(method, url, data, headers):
|
207
|
+
if url == "refresh_end" and headers == {"Content-Type": "application/json"}:
|
208
208
|
return resp
|
209
|
-
raise Exception(f"Error while refreshing access token with request: {method}, {url}, {data}")
|
209
|
+
raise Exception(f"Error while refreshing access token with request: {method}, {url}, {data}, {headers}")
|
File without changes
|
@@ -0,0 +1,65 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
import pytest
|
6
|
+
from airbyte_cdk.models import (
|
7
|
+
AirbyteControlConnectorConfigMessage,
|
8
|
+
AirbyteControlMessage,
|
9
|
+
AirbyteLogMessage,
|
10
|
+
AirbyteMessage,
|
11
|
+
Level,
|
12
|
+
OrchestratorType,
|
13
|
+
Type,
|
14
|
+
)
|
15
|
+
from airbyte_cdk.sources.message import InMemoryMessageRepository
|
16
|
+
|
17
|
+
A_CONTROL = AirbyteControlMessage(
|
18
|
+
type=OrchestratorType.CONNECTOR_CONFIG,
|
19
|
+
emitted_at=0,
|
20
|
+
connectorConfig=AirbyteControlConnectorConfigMessage(config={"a config": "value"}),
|
21
|
+
)
|
22
|
+
ANOTHER_CONTROL = AirbyteControlMessage(
|
23
|
+
type=OrchestratorType.CONNECTOR_CONFIG,
|
24
|
+
emitted_at=0,
|
25
|
+
connectorConfig=AirbyteControlConnectorConfigMessage(config={"another config": "another value"}),
|
26
|
+
)
|
27
|
+
|
28
|
+
|
29
|
+
def test_given_no_messages_when_consume_queue_then_return_empty():
|
30
|
+
repo = InMemoryMessageRepository()
|
31
|
+
messages = list(repo.consume_queue())
|
32
|
+
assert messages == []
|
33
|
+
|
34
|
+
|
35
|
+
def test_given_messages_when_consume_queue_then_return_messages():
|
36
|
+
repo = InMemoryMessageRepository()
|
37
|
+
first_message = AirbyteMessage(type=Type.CONTROL, control=A_CONTROL)
|
38
|
+
repo.emit_message(first_message)
|
39
|
+
second_message = AirbyteMessage(type=Type.CONTROL, control=ANOTHER_CONTROL)
|
40
|
+
repo.emit_message(second_message)
|
41
|
+
|
42
|
+
messages = repo.consume_queue()
|
43
|
+
|
44
|
+
assert list(messages) == [first_message, second_message]
|
45
|
+
|
46
|
+
|
47
|
+
def test_given_message_is_consumed_when_consume_queue_then_remove_message_from_queue():
|
48
|
+
repo = InMemoryMessageRepository()
|
49
|
+
first_message = AirbyteMessage(type=Type.CONTROL, control=A_CONTROL)
|
50
|
+
repo.emit_message(first_message)
|
51
|
+
second_message = AirbyteMessage(type=Type.CONTROL, control=ANOTHER_CONTROL)
|
52
|
+
repo.emit_message(second_message)
|
53
|
+
|
54
|
+
message_generator = repo.consume_queue()
|
55
|
+
consumed_message = next(message_generator)
|
56
|
+
assert consumed_message == first_message
|
57
|
+
|
58
|
+
second_message_generator = repo.consume_queue()
|
59
|
+
assert list(second_message_generator) == [second_message]
|
60
|
+
|
61
|
+
|
62
|
+
def test_given_message_is_not_control_message_when_emit_message_then_raise_error():
|
63
|
+
repo = InMemoryMessageRepository()
|
64
|
+
with pytest.raises(ValueError):
|
65
|
+
repo.emit_message(AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message="any log message")))
|
@@ -4,11 +4,13 @@
|
|
4
4
|
|
5
5
|
import json
|
6
6
|
import logging
|
7
|
+
from unittest.mock import Mock
|
7
8
|
|
8
9
|
import freezegun
|
9
10
|
import pendulum
|
10
11
|
import pytest
|
11
12
|
import requests
|
13
|
+
from airbyte_cdk.models import OrchestratorType, Type
|
12
14
|
from airbyte_cdk.sources.streams.http.requests_native_auth import (
|
13
15
|
BasicHttpAuthenticator,
|
14
16
|
MultipleTokenAuthenticator,
|
@@ -243,7 +245,7 @@ class TestSingleUseRefreshTokenOauth2Authenticator:
|
|
243
245
|
("date_format", "2023-04-04", "YYYY-MM-DD", "2023-04-04T00:00:00+00:00"),
|
244
246
|
]
|
245
247
|
)
|
246
|
-
def
|
248
|
+
def test_given_no_message_repository_get_access_token(self, test_name, expires_in_value, expiry_date_format, expected_expiry_date, capsys, mocker, connector_config):
|
247
249
|
authenticator = SingleUseRefreshTokenOauth2Authenticator(
|
248
250
|
connector_config,
|
249
251
|
token_refresh_endpoint="foobar",
|
@@ -270,6 +272,30 @@ class TestSingleUseRefreshTokenOauth2Authenticator:
|
|
270
272
|
assert not captured.out
|
271
273
|
assert authenticator.access_token == access_token == "new_access_token"
|
272
274
|
|
275
|
+
def test_given_message_repository_when_get_access_token_emit_message(self, mocker, connector_config):
|
276
|
+
message_repository = Mock()
|
277
|
+
authenticator = SingleUseRefreshTokenOauth2Authenticator(
|
278
|
+
connector_config,
|
279
|
+
token_refresh_endpoint="foobar",
|
280
|
+
client_id=connector_config["credentials"]["client_id"],
|
281
|
+
client_secret=connector_config["credentials"]["client_secret"],
|
282
|
+
token_expiry_date_format="YYYY-MM-DD",
|
283
|
+
message_repository=message_repository,
|
284
|
+
)
|
285
|
+
authenticator.refresh_access_token = mocker.Mock(return_value=("new_access_token", "2023-04-04", "new_refresh_token"))
|
286
|
+
authenticator.token_has_expired = mocker.Mock(return_value=True)
|
287
|
+
|
288
|
+
authenticator.get_access_token()
|
289
|
+
|
290
|
+
emitted_message = message_repository.emit_message.call_args_list[0].args[0]
|
291
|
+
assert emitted_message.type == Type.CONTROL
|
292
|
+
assert emitted_message.control.type == OrchestratorType.CONNECTOR_CONFIG
|
293
|
+
assert emitted_message.control.connectorConfig.config["credentials"]["access_token"] == "new_access_token"
|
294
|
+
assert emitted_message.control.connectorConfig.config["credentials"]["refresh_token"] == "new_refresh_token"
|
295
|
+
assert emitted_message.control.connectorConfig.config["credentials"]["token_expiry_date"] == "2023-04-04T00:00:00+00:00"
|
296
|
+
assert emitted_message.control.connectorConfig.config["credentials"]["client_id"] == "my_client_id"
|
297
|
+
assert emitted_message.control.connectorConfig.config["credentials"]["client_secret"] == "my_client_secret"
|
298
|
+
|
273
299
|
def test_refresh_access_token(self, mocker, connector_config):
|
274
300
|
authenticator = SingleUseRefreshTokenOauth2Authenticator(
|
275
301
|
connector_config,
|
@@ -288,7 +314,7 @@ class TestSingleUseRefreshTokenOauth2Authenticator:
|
|
288
314
|
assert authenticator.refresh_access_token() == ("new_access_token", "42", "new_refresh_token")
|
289
315
|
|
290
316
|
|
291
|
-
def mock_request(method, url, data):
|
292
|
-
if url == "refresh_end":
|
317
|
+
def mock_request(method, url, data, headers):
|
318
|
+
if url == "refresh_end" and headers == {"Content-Type": "application/json"}:
|
293
319
|
return resp
|
294
|
-
raise Exception(f"Error while refreshing access token with request: {method}, {url}, {data}")
|
320
|
+
raise Exception(f"Error while refreshing access token with request: {method}, {url}, {data}, {headers}")
|
@@ -7,7 +7,7 @@ import datetime
|
|
7
7
|
import logging
|
8
8
|
from collections import defaultdict
|
9
9
|
from typing import Any, Callable, Dict, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union
|
10
|
-
from unittest.mock import call
|
10
|
+
from unittest.mock import Mock, call
|
11
11
|
|
12
12
|
import pytest
|
13
13
|
from airbyte_cdk.models import (
|
@@ -37,9 +37,11 @@ from airbyte_cdk.models import Type
|
|
37
37
|
from airbyte_cdk.models import Type as MessageType
|
38
38
|
from airbyte_cdk.sources import AbstractSource
|
39
39
|
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
40
|
+
from airbyte_cdk.sources.message import MessageRepository
|
40
41
|
from airbyte_cdk.sources.streams import IncrementalMixin, Stream
|
41
42
|
from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
|
42
43
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
44
|
+
from pytest import fixture
|
43
45
|
|
44
46
|
logger = logging.getLogger("airbyte")
|
45
47
|
|
@@ -50,10 +52,12 @@ class MockSource(AbstractSource):
|
|
50
52
|
check_lambda: Callable[[], Tuple[bool, Optional[Any]]] = None,
|
51
53
|
streams: List[Stream] = None,
|
52
54
|
per_stream: bool = True,
|
55
|
+
message_repository: MessageRepository = None
|
53
56
|
):
|
54
57
|
self._streams = streams
|
55
58
|
self.check_lambda = check_lambda
|
56
59
|
self.per_stream = per_stream
|
60
|
+
self._message_repository = message_repository
|
57
61
|
|
58
62
|
def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
|
59
63
|
if self.check_lambda:
|
@@ -69,6 +73,10 @@ class MockSource(AbstractSource):
|
|
69
73
|
def per_stream_state_enabled(self) -> bool:
|
70
74
|
return self.per_stream
|
71
75
|
|
76
|
+
@property
|
77
|
+
def message_repository(self):
|
78
|
+
return self._message_repository
|
79
|
+
|
72
80
|
|
73
81
|
class StreamNoStateMethod(Stream):
|
74
82
|
name = "managers"
|
@@ -97,6 +105,16 @@ class MockStreamOverridesStateMethod(Stream, IncrementalMixin):
|
|
97
105
|
self._cursor_value = value.get(self.cursor_field, self.start_date)
|
98
106
|
|
99
107
|
|
108
|
+
MESSAGE_FROM_REPOSITORY = Mock()
|
109
|
+
|
110
|
+
|
111
|
+
@fixture
|
112
|
+
def message_repository():
|
113
|
+
message_repository = Mock(spec=MessageRepository)
|
114
|
+
message_repository.consume_queue.return_value = [message for message in [MESSAGE_FROM_REPOSITORY]]
|
115
|
+
return message_repository
|
116
|
+
|
117
|
+
|
100
118
|
def test_successful_check():
|
101
119
|
"""Tests that if a source returns TRUE for the connection check the appropriate connectionStatus success message is returned"""
|
102
120
|
expected = AirbyteConnectionStatus(status=Status.SUCCEEDED)
|
@@ -221,6 +239,34 @@ def test_read_nonexistent_stream_raises_exception(mocker):
|
|
221
239
|
list(src.read(logger, {}, catalog))
|
222
240
|
|
223
241
|
|
242
|
+
def test_read_stream_emits_repository_message_before_record(mocker, message_repository):
|
243
|
+
stream = MockStream(name="my_stream")
|
244
|
+
mocker.patch.object(MockStream, "get_json_schema", return_value={})
|
245
|
+
mocker.patch.object(MockStream, "read_records", side_effect=[[{"a record": "a value"}, {"another record": "another value"}]])
|
246
|
+
message_repository.consume_queue.side_effect = [[message for message in [MESSAGE_FROM_REPOSITORY]], []]
|
247
|
+
|
248
|
+
source = MockSource(streams=[stream], message_repository=message_repository)
|
249
|
+
|
250
|
+
messages = list(source.read(logger, {}, ConfiguredAirbyteCatalog(streams=[_configured_stream(stream, SyncMode.full_refresh)])))
|
251
|
+
|
252
|
+
assert messages.count(MESSAGE_FROM_REPOSITORY) == 1
|
253
|
+
record_messages = (message for message in messages if message.type == Type.RECORD)
|
254
|
+
assert all(messages.index(MESSAGE_FROM_REPOSITORY) < messages.index(record) for record in record_messages)
|
255
|
+
|
256
|
+
|
257
|
+
def test_read_stream_emits_repository_message_on_error(mocker, message_repository):
|
258
|
+
stream = MockStream(name="my_stream")
|
259
|
+
mocker.patch.object(MockStream, "get_json_schema", return_value={})
|
260
|
+
mocker.patch.object(MockStream, "read_records", side_effect=RuntimeError("error"))
|
261
|
+
message_repository.consume_queue.return_value = [message for message in [MESSAGE_FROM_REPOSITORY]]
|
262
|
+
|
263
|
+
source = MockSource(streams=[stream], message_repository=message_repository)
|
264
|
+
|
265
|
+
with pytest.raises(RuntimeError):
|
266
|
+
messages = list(source.read(logger, {}, ConfiguredAirbyteCatalog(streams=[_configured_stream(stream, SyncMode.full_refresh)])))
|
267
|
+
assert MESSAGE_FROM_REPOSITORY in messages
|
268
|
+
|
269
|
+
|
224
270
|
def test_read_stream_with_error_gets_display_message(mocker):
|
225
271
|
stream = MockStream(name="my_stream")
|
226
272
|
|
@@ -0,0 +1,53 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from typing import Dict, List
|
6
|
+
|
7
|
+
import pytest
|
8
|
+
from airbyte_cdk.models.airbyte_protocol import AirbyteRecordMessage
|
9
|
+
from airbyte_cdk.utils.datetime_format_inferrer import DatetimeFormatInferrer
|
10
|
+
|
11
|
+
NOW = 1234567
|
12
|
+
|
13
|
+
|
14
|
+
@pytest.mark.parametrize(
|
15
|
+
"test_name,input_records,expected_candidate_fields",
|
16
|
+
[
|
17
|
+
("empty", [], {}),
|
18
|
+
("simple_match", [{"d": "2022-02-03"}], {"d": "%Y-%m-%d"}),
|
19
|
+
("timestamp_match_integer", [{"d": 1686058051}], {"d": "%s"}),
|
20
|
+
("timestamp_match_string", [{"d": "1686058051"}], {"d": "%s"}),
|
21
|
+
("timestamp_no_match_integer", [{"d": 99}], {}),
|
22
|
+
("timestamp_no_match_string", [{"d": "99999999999999999999"}], {}),
|
23
|
+
("simple_no_match", [{"d": "20220203"}], {}),
|
24
|
+
("multiple_match", [{"d": "2022-02-03", "e": "2022-02-03"}], {"d": "%Y-%m-%d", "e": "%Y-%m-%d"}),
|
25
|
+
(
|
26
|
+
"multiple_no_match",
|
27
|
+
[{"d": "20220203", "r": "ccc", "e": {"something-else": "2023-03-03"}, "s": ["2023-03-03"], "x": False, "y": 123}],
|
28
|
+
{},
|
29
|
+
),
|
30
|
+
("format_1", [{"d": "2022-02-03"}], {"d": "%Y-%m-%d"}),
|
31
|
+
("format_2", [{"d": "2022-02-03 12:34:56"}], {"d": "%Y-%m-%d %H:%M:%S"}),
|
32
|
+
("format_3", [{"d": "2022-02-03 12:34:56.123456+00:00"}], {"d": "%Y-%m-%d %H:%M:%S.%f+00:00"}),
|
33
|
+
("format_4", [{"d": "2022-02-03T12:34:56.123456+0000"}], {"d": "%Y-%m-%dT%H:%M:%S.%f%z"}),
|
34
|
+
("format_4 2", [{"d": "2022-02-03T12:34:56.000Z"}], {"d": "%Y-%m-%dT%H:%M:%S.%f%z"}),
|
35
|
+
("format_4 2", [{"d": "2022-02-03T12:34:56.000000Z"}], {"d": "%Y-%m-%dT%H:%M:%S.%f%z"}),
|
36
|
+
("format_6", [{"d": "03/02/2022 12:34"}], {"d": "%d/%m/%Y %H:%M"}),
|
37
|
+
("format_7", [{"d": "2022-02"}], {"d": "%Y-%m"}),
|
38
|
+
("format_8", [{"d": "03-02-2022"}], {"d": "%d-%m-%Y"}),
|
39
|
+
("limit_down", [{"d": "2022-02-03", "x": "2022-02-03"}, {"d": "2022-02-03", "x": "another thing"}], {"d": "%Y-%m-%d"}),
|
40
|
+
("limit_down all", [{"d": "2022-02-03", "x": "2022-02-03"}, {"d": "also another thing", "x": "another thing"}], {}),
|
41
|
+
("limit_down empty", [{"d": "2022-02-03", "x": "2022-02-03"}, {}], {}),
|
42
|
+
("limit_down unsupported type", [{"d": "2022-02-03"}, {"d": False}], {}),
|
43
|
+
("limit_down complex type", [{"d": "2022-02-03"}, {"d": {"date": "2022-03-03"}}], {}),
|
44
|
+
("limit_down different format", [{"d": "2022-02-03"}, {"d": 1686058051}], {}),
|
45
|
+
("limit_down different format", [{"d": "2022-02-03"}, {"d": "2022-02-03T12:34:56.000000Z"}], {}),
|
46
|
+
("no scope expand", [{}, {"d": "2022-02-03"}], {}),
|
47
|
+
],
|
48
|
+
)
|
49
|
+
def test_schema_inferrer(test_name, input_records: List, expected_candidate_fields: Dict[str, str]):
|
50
|
+
inferrer = DatetimeFormatInferrer()
|
51
|
+
for record in input_records:
|
52
|
+
inferrer.accumulate(AirbyteRecordMessage(stream="abc", data=record, emitted_at=NOW))
|
53
|
+
assert inferrer.get_inferred_datetime_formats() == expected_candidate_fields
|
File without changes
|
File without changes
|
File without changes
|