airbyte-cdk 0.40.2__py3-none-any.whl → 0.40.4__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (27) hide show
  1. airbyte_cdk/config_observation.py +10 -2
  2. airbyte_cdk/connector_builder/message_grouper.py +7 -2
  3. airbyte_cdk/connector_builder/models.py +1 -0
  4. airbyte_cdk/entrypoint.py +34 -19
  5. airbyte_cdk/sources/abstract_source.py +13 -0
  6. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +61 -3
  7. airbyte_cdk/sources/declarative/manifest_declarative_source.py +6 -0
  8. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +4 -4
  9. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +7 -2
  10. airbyte_cdk/sources/message/__init__.py +7 -0
  11. airbyte_cdk/sources/message/repository.py +36 -0
  12. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +6 -1
  13. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +9 -2
  14. airbyte_cdk/utils/datetime_format_inferrer.py +80 -0
  15. {airbyte_cdk-0.40.2.dist-info → airbyte_cdk-0.40.4.dist-info}/METADATA +1 -1
  16. {airbyte_cdk-0.40.2.dist-info → airbyte_cdk-0.40.4.dist-info}/RECORD +27 -21
  17. unit_tests/connector_builder/test_connector_builder_handler.py +37 -3
  18. unit_tests/connector_builder/test_message_grouper.py +8 -18
  19. unit_tests/sources/declarative/auth/test_oauth.py +3 -3
  20. unit_tests/sources/message/__init__.py +0 -0
  21. unit_tests/sources/message/test_repository.py +65 -0
  22. unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +30 -4
  23. unit_tests/sources/test_abstract_source.py +47 -1
  24. unit_tests/utils/test_datetime_format_inferrer.py +53 -0
  25. {airbyte_cdk-0.40.2.dist-info → airbyte_cdk-0.40.4.dist-info}/LICENSE.txt +0 -0
  26. {airbyte_cdk-0.40.2.dist-info → airbyte_cdk-0.40.4.dist-info}/WHEEL +0 -0
  27. {airbyte_cdk-0.40.2.dist-info → airbyte_cdk-0.40.4.dist-info}/top_level.txt +0 -0
@@ -68,10 +68,18 @@ def observe_connector_config(non_observed_connector_config: MutableMapping[str,
68
68
 
69
69
 
70
70
  def emit_configuration_as_airbyte_control_message(config: MutableMapping):
71
+ """
72
+ WARNING: deprecated - emit_configuration_as_airbyte_control_message is being deprecated in favor of the MessageRepository mechanism.
73
+ See the airbyte_cdk.sources.message package
74
+ """
75
+ airbyte_message = create_connector_config_control_message(config)
76
+ print(airbyte_message.json(exclude_unset=True))
77
+
78
+
79
+ def create_connector_config_control_message(config):
71
80
  control_message = AirbyteControlMessage(
72
81
  type=OrchestratorType.CONNECTOR_CONFIG,
73
82
  emitted_at=time.time() * 1000,
74
83
  connectorConfig=AirbyteControlConnectorConfigMessage(config=config),
75
84
  )
76
- airbyte_message = AirbyteMessage(type=Type.CONTROL, control=control_message)
77
- print(airbyte_message.json(exclude_unset=True))
85
+ return AirbyteMessage(type=Type.CONTROL, control=control_message)
@@ -14,6 +14,7 @@ from airbyte_cdk.entrypoint import AirbyteEntrypoint
14
14
  from airbyte_cdk.sources import AbstractSource
15
15
  from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
16
16
  from airbyte_cdk.utils import AirbyteTracedException
17
+ from airbyte_cdk.utils.datetime_format_inferrer import DatetimeFormatInferrer
17
18
  from airbyte_cdk.utils.schema_inferrer import SchemaInferrer
18
19
  from airbyte_protocol.models.airbyte_protocol import (
19
20
  AirbyteControlMessage,
@@ -46,6 +47,7 @@ class MessageGrouper:
46
47
  if record_limit is not None and not (1 <= record_limit <= 1000):
47
48
  raise ValueError(f"Record limit must be between 1 and 1000. Got {record_limit}")
48
49
  schema_inferrer = SchemaInferrer()
50
+ datetime_format_inferrer = DatetimeFormatInferrer()
49
51
 
50
52
  if record_limit is None:
51
53
  record_limit = self._max_record_limit
@@ -58,6 +60,7 @@ class MessageGrouper:
58
60
  for message_group in self._get_message_groups(
59
61
  self._read_stream(source, config, configured_catalog),
60
62
  schema_inferrer,
63
+ datetime_format_inferrer,
61
64
  record_limit,
62
65
  ):
63
66
  if isinstance(message_group, AirbyteLogMessage):
@@ -79,11 +82,12 @@ class MessageGrouper:
79
82
  inferred_schema=schema_inferrer.get_stream_schema(
80
83
  configured_catalog.streams[0].stream.name
81
84
  ), # The connector builder currently only supports reading from a single stream at a time
82
- latest_config_update=latest_config_update.connectorConfig.config if latest_config_update else self._clean_config(config),
85
+ latest_config_update=self._clean_config(latest_config_update.connectorConfig.config) if latest_config_update else None,
86
+ inferred_datetime_formats=datetime_format_inferrer.get_inferred_datetime_formats(),
83
87
  )
84
88
 
85
89
  def _get_message_groups(
86
- self, messages: Iterator[AirbyteMessage], schema_inferrer: SchemaInferrer, limit: int
90
+ self, messages: Iterator[AirbyteMessage], schema_inferrer: SchemaInferrer, datetime_format_inferrer: DatetimeFormatInferrer, limit: int
87
91
  ) -> Iterable[Union[StreamReadPages, AirbyteControlMessage, AirbyteLogMessage, AirbyteTraceMessage]]:
88
92
  """
89
93
  Message groups are partitioned according to when request log messages are received. Subsequent response log messages
@@ -141,6 +145,7 @@ class MessageGrouper:
141
145
  current_page_records.append(message.record.data)
142
146
  records_count += 1
143
147
  schema_inferrer.accumulate(message.record)
148
+ datetime_format_inferrer.accumulate(message.record)
144
149
  elif message.type == MessageType.CONTROL and message.control.type == OrchestratorType.CONNECTOR_CONFIG:
145
150
  yield message.control
146
151
  else:
@@ -48,6 +48,7 @@ class StreamRead(object):
48
48
  slices: List[StreamReadSlices]
49
49
  test_read_limit_reached: bool
50
50
  inferred_schema: Optional[Dict[str, Any]]
51
+ inferred_datetime_formats: Optional[Dict[str, str]]
51
52
  latest_config_update: Optional[Dict[str, Any]]
52
53
 
53
54
 
airbyte_cdk/entrypoint.py CHANGED
@@ -77,27 +77,32 @@ class AirbyteEntrypoint(object):
77
77
  else:
78
78
  self.logger.setLevel(logging.INFO)
79
79
 
80
- # todo: add try catch for exceptions with different exit codes
81
80
  source_spec: ConnectorSpecification = self.source.spec(self.logger)
82
- with tempfile.TemporaryDirectory() as temp_dir:
83
- if cmd == "spec":
84
- message = AirbyteMessage(type=Type.SPEC, spec=source_spec)
85
- yield message.json(exclude_unset=True)
86
- else:
87
- raw_config = self.source.read_config(parsed_args.config)
88
- config = self.source.configure(raw_config, temp_dir)
89
-
90
- if cmd == "check":
91
- yield from map(AirbyteEntrypoint.airbyte_message_to_string, self.check(source_spec, config))
92
- elif cmd == "discover":
93
- yield from map(AirbyteEntrypoint.airbyte_message_to_string, self.discover(source_spec, config))
94
- elif cmd == "read":
95
- config_catalog = self.source.read_catalog(parsed_args.catalog)
96
- state = self.source.read_state(parsed_args.state)
97
-
98
- yield from map(AirbyteEntrypoint.airbyte_message_to_string, self.read(source_spec, config, config_catalog, state))
81
+ try:
82
+ with tempfile.TemporaryDirectory() as temp_dir:
83
+ if cmd == "spec":
84
+ message = AirbyteMessage(type=Type.SPEC, spec=source_spec)
85
+ yield from [
86
+ self.airbyte_message_to_string(queued_message) for queued_message in self._emit_queued_messages(self.source)
87
+ ]
88
+ yield self.airbyte_message_to_string(message)
99
89
  else:
100
- raise Exception("Unexpected command " + cmd)
90
+ raw_config = self.source.read_config(parsed_args.config)
91
+ config = self.source.configure(raw_config, temp_dir)
92
+
93
+ if cmd == "check":
94
+ yield from map(AirbyteEntrypoint.airbyte_message_to_string, self.check(source_spec, config))
95
+ elif cmd == "discover":
96
+ yield from map(AirbyteEntrypoint.airbyte_message_to_string, self.discover(source_spec, config))
97
+ elif cmd == "read":
98
+ config_catalog = self.source.read_catalog(parsed_args.catalog)
99
+ state = self.source.read_state(parsed_args.state)
100
+
101
+ yield from map(AirbyteEntrypoint.airbyte_message_to_string, self.read(source_spec, config, config_catalog, state))
102
+ else:
103
+ raise Exception("Unexpected command " + cmd)
104
+ finally:
105
+ yield from [self.airbyte_message_to_string(queued_message) for queued_message in self._emit_queued_messages(self.source)]
101
106
 
102
107
  def check(self, source_spec: ConnectorSpecification, config: TConfig) -> Iterable[AirbyteMessage]:
103
108
  self.set_up_secret_filter(config, source_spec.connectionSpecification)
@@ -106,6 +111,7 @@ class AirbyteEntrypoint(object):
106
111
  except AirbyteTracedException as traced_exc:
107
112
  connection_status = traced_exc.as_connection_status_message()
108
113
  if connection_status:
114
+ yield from self._emit_queued_messages(self.source)
109
115
  yield connection_status
110
116
  return
111
117
 
@@ -115,6 +121,7 @@ class AirbyteEntrypoint(object):
115
121
  else:
116
122
  self.logger.error("Check failed")
117
123
 
124
+ yield from self._emit_queued_messages(self.source)
118
125
  yield AirbyteMessage(type=Type.CONNECTION_STATUS, connectionStatus=check_result)
119
126
 
120
127
  def discover(self, source_spec: ConnectorSpecification, config: TConfig) -> Iterable[AirbyteMessage]:
@@ -122,6 +129,8 @@ class AirbyteEntrypoint(object):
122
129
  if self.source.check_config_against_spec:
123
130
  self.validate_connection(source_spec, config)
124
131
  catalog = self.source.discover(self.logger, config)
132
+
133
+ yield from self._emit_queued_messages(self.source)
125
134
  yield AirbyteMessage(type=Type.CATALOG, catalog=catalog)
126
135
 
127
136
  def read(self, source_spec: ConnectorSpecification, config: TConfig, catalog: TCatalog, state: TState) -> Iterable[AirbyteMessage]:
@@ -130,6 +139,7 @@ class AirbyteEntrypoint(object):
130
139
  self.validate_connection(source_spec, config)
131
140
 
132
141
  yield from self.source.read(self.logger, config, catalog, state)
142
+ yield from self._emit_queued_messages(self.source)
133
143
 
134
144
  @staticmethod
135
145
  def validate_connection(source_spec: ConnectorSpecification, config: Mapping[str, Any]) -> None:
@@ -149,6 +159,11 @@ class AirbyteEntrypoint(object):
149
159
  def airbyte_message_to_string(airbyte_message: AirbyteMessage) -> str:
150
160
  return airbyte_message.json(exclude_unset=True)
151
161
 
162
+ def _emit_queued_messages(self, source) -> Iterable[AirbyteMessage]:
163
+ if hasattr(source, "message_repository") and source.message_repository:
164
+ yield from source.message_repository.consume_queue()
165
+ return
166
+
152
167
 
153
168
  def launch(source: Source, args: List[str]):
154
169
  source_entrypoint = AirbyteEntrypoint(source)
@@ -22,6 +22,7 @@ from airbyte_cdk.models import (
22
22
  )
23
23
  from airbyte_cdk.models import Type as MessageType
24
24
  from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
25
+ from airbyte_cdk.sources.message import MessageRepository
25
26
  from airbyte_cdk.sources.source import Source
26
27
  from airbyte_cdk.sources.streams import Stream
27
28
  from airbyte_cdk.sources.streams.core import StreamData
@@ -130,6 +131,7 @@ class AbstractSource(Source, ABC):
130
131
  yield stream_status_as_airbyte_message(configured_stream, AirbyteStreamStatus.INCOMPLETE)
131
132
  raise e
132
133
  except Exception as e:
134
+ yield from self._emit_queued_messages()
133
135
  logger.exception(f"Encountered an exception while reading stream {configured_stream.stream.name}")
134
136
  logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
135
137
  yield stream_status_as_airbyte_message(configured_stream, AirbyteStreamStatus.INCOMPLETE)
@@ -198,6 +200,7 @@ class AbstractSource(Source, ABC):
198
200
  logger.info(f"Marking stream {stream_name} as RUNNING")
199
201
  # If we just read the first record of the stream, emit the transition to the RUNNING state
200
202
  yield stream_status_as_airbyte_message(configured_stream, AirbyteStreamStatus.RUNNING)
203
+ yield from self._emit_queued_messages()
201
204
  yield record
202
205
 
203
206
  logger.info(f"Read {record_counter} records from {stream_name} stream")
@@ -264,6 +267,7 @@ class AbstractSource(Source, ABC):
264
267
  record_counter = 0
265
268
  for message_counter, record_data_or_message in enumerate(records, start=1):
266
269
  message = self._get_message(record_data_or_message, stream_instance)
270
+ yield from self._emit_queued_messages()
267
271
  yield message
268
272
  if message.type == MessageType.RECORD:
269
273
  record = message.record
@@ -298,6 +302,11 @@ class AbstractSource(Source, ABC):
298
302
  """
299
303
  return logger.isEnabledFor(logging.DEBUG)
300
304
 
305
+ def _emit_queued_messages(self):
306
+ if self.message_repository:
307
+ yield from self.message_repository.consume_queue()
308
+ return
309
+
301
310
  def _read_full_refresh(
302
311
  self,
303
312
  logger: logging.Logger,
@@ -357,3 +366,7 @@ class AbstractSource(Source, ABC):
357
366
  return record_data_or_message
358
367
  else:
359
368
  return stream_data_to_airbyte_message(stream.name, record_data_or_message, stream.transformer, stream.get_json_schema())
369
+
370
+ @property
371
+ def message_repository(self) -> Union[None, MessageRepository]:
372
+ return None
@@ -580,10 +580,40 @@ definitions:
580
580
  - "{{ config['record_cursor'] }}"
581
581
  datetime_format:
582
582
  title: Cursor Field Datetime Format
583
- description: The datetime format of the Cursor Field.
583
+ description: |
584
+ The datetime format of the Cursor Field. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:
585
+ * **%s**: Epoch unix timestamp - `1686218963`
586
+ * **%a**: Weekday (abbreviated) - `Sun`
587
+ * **%A**: Weekday (full) - `Sunday`
588
+ * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)
589
+ * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`
590
+ * **%b**: Month (abbreviated) - `Jan`
591
+ * **%B**: Month (full) - `January`
592
+ * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`
593
+ * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`
594
+ * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`
595
+ * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`
596
+ * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`
597
+ * **%p**: AM/PM indicator
598
+ * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`
599
+ * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`
600
+ * **%f**: Microsecond (zero-padded to 6 digits) - `000000`
601
+ * **%z**: UTC offset - `(empty)`, `+0000`, `-0400`
602
+ * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`
603
+ * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`
604
+ * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`
605
+ * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`
606
+ * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`
607
+ * **%x**: Date standard format - `08/16/1988`
608
+ * **%X**: Time standard format - `21:30:00`
609
+ * **%%**: Literal '%' character
610
+
611
+ Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).
584
612
  type: string
585
613
  examples:
586
614
  - "%Y-%m-%dT%H:%M:%S.%f%z"
615
+ - "%Y-%m-%d"
616
+ - "%s"
587
617
  cursor_granularity:
588
618
  title: Cursor Granularity
589
619
  description:
@@ -1283,11 +1313,39 @@ definitions:
1283
1313
  - "{{ config['start_time'] }}"
1284
1314
  datetime_format:
1285
1315
  title: Datetime Format
1286
- description: Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use %s if the datetime value is in epoch time (Unix timestamp).
1316
+ description: |
1317
+ Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:
1318
+ * **%s**: Epoch unix timestamp - `1686218963`
1319
+ * **%a**: Weekday (abbreviated) - `Sun`
1320
+ * **%A**: Weekday (full) - `Sunday`
1321
+ * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)
1322
+ * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`
1323
+ * **%b**: Month (abbreviated) - `Jan`
1324
+ * **%B**: Month (full) - `January`
1325
+ * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`
1326
+ * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`
1327
+ * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`
1328
+ * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`
1329
+ * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`
1330
+ * **%p**: AM/PM indicator
1331
+ * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`
1332
+ * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`
1333
+ * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`
1334
+ * **%z**: UTC offset - `(empty)`, `+0000`, `-0400`, `+1030`, `+063415`, `-030712.345216`
1335
+ * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`
1336
+ * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`
1337
+ * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`
1338
+ * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`
1339
+ * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`
1340
+ * **%x**: Date representation - `08/16/1988`
1341
+ * **%X**: Time representation - `21:30:00`
1342
+ * **%%**: Literal '%' character
1343
+
1344
+ Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).
1287
1345
  type: string
1288
1346
  default: ""
1289
1347
  examples:
1290
- - "%Y-%m-%dT%H:%M:%S.%f%"
1348
+ - "%Y-%m-%dT%H:%M:%S.%f%z"
1291
1349
  - "%Y-%m-%d"
1292
1350
  - "%s"
1293
1351
  max_datetime:
@@ -26,6 +26,7 @@ from airbyte_cdk.sources.declarative.parsers.manifest_component_transformer impo
26
26
  from airbyte_cdk.sources.declarative.parsers.manifest_reference_resolver import ManifestReferenceResolver
27
27
  from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import ModelToComponentFactory
28
28
  from airbyte_cdk.sources.declarative.types import ConnectionDefinition
29
+ from airbyte_cdk.sources.message import MessageRepository
29
30
  from airbyte_cdk.sources.streams.core import Stream
30
31
  from jsonschema.exceptions import ValidationError
31
32
  from jsonschema.validators import validate
@@ -61,6 +62,7 @@ class ManifestDeclarativeSource(DeclarativeSource):
61
62
  self._debug = debug
62
63
  self._emit_connector_builder_messages = emit_connector_builder_messages
63
64
  self._constructor = component_factory if component_factory else ModelToComponentFactory(emit_connector_builder_messages)
65
+ self._message_repository = self._constructor.get_message_repository()
64
66
 
65
67
  self._validate_source()
66
68
 
@@ -68,6 +70,10 @@ class ManifestDeclarativeSource(DeclarativeSource):
68
70
  def resolved_manifest(self) -> Mapping[str, Any]:
69
71
  return self._source_config
70
72
 
73
+ @property
74
+ def message_repository(self) -> Union[None, MessageRepository]:
75
+ return self._message_repository
76
+
71
77
  @property
72
78
  def connection_checker(self) -> ConnectionChecker:
73
79
  check = self._source_config["check"]
@@ -453,8 +453,8 @@ class MinMaxDatetime(BaseModel):
453
453
  )
454
454
  datetime_format: Optional[str] = Field(
455
455
  "",
456
- description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use %s if the datetime value is in epoch time (Unix timestamp).',
457
- examples=["%Y-%m-%dT%H:%M:%S.%f%", "%Y-%m-%d", "%s"],
456
+ description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-0400`, `+1030`, `+063415`, `-030712.345216`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date representation - `08/16/1988`\n * **%X**: Time representation - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n',
457
+ examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s"],
458
458
  title="Datetime Format",
459
459
  )
460
460
  max_datetime: Optional[str] = Field(
@@ -806,8 +806,8 @@ class DatetimeBasedCursor(BaseModel):
806
806
  )
807
807
  datetime_format: str = Field(
808
808
  ...,
809
- description="The datetime format of the Cursor Field.",
810
- examples=["%Y-%m-%dT%H:%M:%S.%f%z"],
809
+ description="The datetime format of the Cursor Field. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-0400`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal '%' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n",
810
+ examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s"],
811
811
  title="Cursor Field Datetime Format",
812
812
  )
813
813
  cursor_granularity: Optional[str] = Field(
@@ -100,6 +100,7 @@ from airbyte_cdk.sources.declarative.stream_slicers import CartesianProductStrea
100
100
  from airbyte_cdk.sources.declarative.transformations import AddFields, RemoveFields
101
101
  from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition
102
102
  from airbyte_cdk.sources.declarative.types import Config
103
+ from airbyte_cdk.sources.message import InMemoryMessageRepository
103
104
  from pydantic import BaseModel
104
105
 
105
106
  ComponentDefinition: Union[Literal, Mapping, List]
@@ -121,6 +122,7 @@ class ModelToComponentFactory:
121
122
  self._limit_slices_fetched = limit_slices_fetched
122
123
  self._emit_connector_builder_messages = emit_connector_builder_messages
123
124
  self._disable_retries = disable_retries
125
+ self._message_repository = InMemoryMessageRepository()
124
126
 
125
127
  def _init_mappings(self):
126
128
  self.PYDANTIC_MODEL_TO_CONSTRUCTOR: [Type[BaseModel], Callable] = {
@@ -675,8 +677,7 @@ class ModelToComponentFactory:
675
677
  def create_no_pagination(model: NoPaginationModel, config: Config, **kwargs) -> NoPagination:
676
678
  return NoPagination(parameters={})
677
679
 
678
- @staticmethod
679
- def create_oauth_authenticator(model: OAuthAuthenticatorModel, config: Config, **kwargs) -> DeclarativeOauth2Authenticator:
680
+ def create_oauth_authenticator(self, model: OAuthAuthenticatorModel, config: Config, **kwargs) -> DeclarativeOauth2Authenticator:
680
681
  if model.refresh_token_updater:
681
682
  return DeclarativeSingleUseRefreshTokenOauth2Authenticator(
682
683
  config,
@@ -693,6 +694,7 @@ class ModelToComponentFactory:
693
694
  refresh_request_body=InterpolatedMapping(model.refresh_request_body or {}, parameters=model.parameters).eval(config),
694
695
  scopes=model.scopes,
695
696
  token_expiry_date_format=model.token_expiry_date_format,
697
+ message_repository=self._message_repository,
696
698
  )
697
699
  return DeclarativeOauth2Authenticator(
698
700
  access_token_name=model.access_token_name,
@@ -845,3 +847,6 @@ class ModelToComponentFactory:
845
847
  return WaitUntilTimeFromHeaderBackoffStrategy(
846
848
  header=model.header, parameters=model.parameters, config=config, min_wait=model.min_wait, regex=model.regex
847
849
  )
850
+
851
+ def get_message_repository(self):
852
+ return self._message_repository
@@ -0,0 +1,7 @@
1
+ #
2
+ # Copyright (c) 2021 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from .repository import InMemoryMessageRepository, MessageRepository
6
+
7
+ __all__ = ["InMemoryMessageRepository", "MessageRepository"]
@@ -0,0 +1,36 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import Iterable
7
+
8
+ from airbyte_cdk.models import AirbyteMessage, Type
9
+
10
+
11
+ class MessageRepository(ABC):
12
+ @abstractmethod
13
+ def emit_message(self, message: AirbyteMessage) -> None:
14
+ raise NotImplementedError()
15
+
16
+ @abstractmethod
17
+ def consume_queue(self) -> Iterable[AirbyteMessage]:
18
+ raise NotImplementedError()
19
+
20
+
21
+ class InMemoryMessageRepository(MessageRepository):
22
+ def __init__(self):
23
+ self._message_queue = []
24
+
25
+ def emit_message(self, message: AirbyteMessage) -> None:
26
+ """
27
+ :param message: As of today, only AirbyteControlMessages are supported given that supporting other types of message will need more
28
+ work and therefore this work has been postponed
29
+ """
30
+ if message.type != Type.CONTROL:
31
+ raise ValueError("As of today, only AirbyteControlMessages are supported as part of the InMemoryMessageRepository")
32
+ self._message_queue.append(message)
33
+
34
+ def consume_queue(self) -> Iterable[AirbyteMessage]:
35
+ while self._message_queue:
36
+ yield self._message_queue.pop(0)
@@ -79,7 +79,12 @@ class AbstractOauth2Authenticator(AuthBase):
79
79
  )
80
80
  def _get_refresh_access_token_response(self):
81
81
  try:
82
- response = requests.request(method="POST", url=self.get_token_refresh_endpoint(), data=self.build_refresh_request_body())
82
+ response = requests.request(
83
+ method="POST",
84
+ url=self.get_token_refresh_endpoint(),
85
+ data=self.build_refresh_request_body(),
86
+ headers={"Content-Type": "application/json"},
87
+ )
83
88
  response.raise_for_status()
84
89
  return response.json()
85
90
  except requests.exceptions.RequestException as e:
@@ -6,7 +6,8 @@ from typing import Any, List, Mapping, Optional, Sequence, Tuple, Union
6
6
 
7
7
  import dpath
8
8
  import pendulum
9
- from airbyte_cdk.config_observation import emit_configuration_as_airbyte_control_message
9
+ from airbyte_cdk.config_observation import create_connector_config_control_message, emit_configuration_as_airbyte_control_message
10
+ from airbyte_cdk.sources.message import MessageRepository
10
11
  from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_oauth import AbstractOauth2Authenticator
11
12
 
12
13
 
@@ -115,6 +116,7 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
115
116
  refresh_token_config_path: Sequence[str] = ("credentials", "refresh_token"),
116
117
  token_expiry_date_config_path: Sequence[str] = ("credentials", "token_expiry_date"),
117
118
  token_expiry_date_format: Optional[str] = None,
119
+ message_repository: MessageRepository = None,
118
120
  ):
119
121
  """
120
122
 
@@ -144,6 +146,7 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
144
146
  self._token_expiry_date_format = token_expiry_date_format
145
147
  self._refresh_token_name = refresh_token_name
146
148
  self._connector_config = connector_config
149
+ self._message_repository = message_repository
147
150
  super().__init__(
148
151
  token_refresh_endpoint,
149
152
  self.get_client_id(),
@@ -211,7 +214,11 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
211
214
  self.access_token = new_access_token
212
215
  self.set_refresh_token(new_refresh_token)
213
216
  self.set_token_expiry_date(new_token_expiry_date)
214
- emit_configuration_as_airbyte_control_message(self._connector_config)
217
+ if self._message_repository:
218
+ self._message_repository.emit_message(create_connector_config_control_message(self._connector_config))
219
+ else:
220
+ # FIXME emit_configuration_as_airbyte_control_message as been deprecated in favor of package airbyte_cdk.sources.message
221
+ emit_configuration_as_airbyte_control_message(self._connector_config)
215
222
  return self.access_token
216
223
 
217
224
  def refresh_access_token(self) -> Tuple[str, str, str]:
@@ -0,0 +1,80 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from typing import Any, Dict, Union
6
+
7
+ from airbyte_cdk.models import AirbyteRecordMessage
8
+ from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser
9
+
10
+
11
+ class DatetimeFormatInferrer:
12
+ """
13
+ This class is used to detect toplevel fields in records that might be datetime values, along with the used format.
14
+ """
15
+
16
+ def __init__(self):
17
+ self._parser = DatetimeParser()
18
+ self._datetime_candidates: Union[None, Dict[str, str]] = None
19
+ self._formats = [
20
+ "%Y-%m-%d",
21
+ "%Y-%m-%d %H:%M:%S",
22
+ "%Y-%m-%d %H:%M:%S.%f+00:00",
23
+ "%Y-%m-%dT%H:%M:%S.%f%z",
24
+ "%s",
25
+ "%d/%m/%Y %H:%M",
26
+ "%Y-%m",
27
+ "%d-%m-%Y",
28
+ "%Y-%m-%dT%H:%M:%SZ",
29
+ ]
30
+ self._timestamp_heuristic_range = range(1_000_000_000, 2_000_000_000)
31
+
32
+ def _can_be_datetime(self, value: Any) -> bool:
33
+ """Checks if the value can be a datetime. This is the case if the value is a string or an integer between 1_000_000_000 and 2_000_000_000. This is separate from the format check for performance reasons"""
34
+ if isinstance(value, str) and (not value.isdecimal() or int(value) in self._timestamp_heuristic_range):
35
+ return True
36
+ if isinstance(value, int) and value in self._timestamp_heuristic_range:
37
+ return True
38
+ return False
39
+
40
+ def _matches_format(self, value: Any, format: str) -> bool:
41
+ """Checks if the value matches the format"""
42
+ try:
43
+ self._parser.parse(value, format)
44
+ return True
45
+ except ValueError:
46
+ return False
47
+
48
+ def _initialize(self, record: AirbyteRecordMessage):
49
+ """Initializes the internal state of the class"""
50
+ self._datetime_candidates = {}
51
+ for field_name, field_value in record.data.items():
52
+ if not self._can_be_datetime(field_value):
53
+ continue
54
+ for format in self._formats:
55
+ if self._matches_format(field_value, format):
56
+ self._datetime_candidates[field_name] = format
57
+ break
58
+
59
+ def _validate(self, record: AirbyteRecordMessage):
60
+ """Validates that the record is consistent with the inferred datetime formats"""
61
+ for candidate_field_name in list(self._datetime_candidates.keys()):
62
+ candidate_field_format = self._datetime_candidates[candidate_field_name]
63
+ current_value = record.data.get(candidate_field_name, None)
64
+ if (
65
+ current_value is None
66
+ or not self._can_be_datetime(current_value)
67
+ or not self._matches_format(current_value, candidate_field_format)
68
+ ):
69
+ self._datetime_candidates.pop(candidate_field_name)
70
+
71
+ def accumulate(self, record: AirbyteRecordMessage):
72
+ """Analyzes the record and updates the internal state of candidate datetime fields"""
73
+ self._initialize(record) if self._datetime_candidates is None else self._validate(record)
74
+
75
+ def get_inferred_datetime_formats(self) -> Dict[str, str]:
76
+ """
77
+ Returns the list of candidate datetime fields - the keys are the field names and the values are the inferred datetime formats.
78
+ For these fields the format was consistent across all visited records.
79
+ """
80
+ return self._datetime_candidates or {}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 0.40.2
3
+ Version: 0.40.4
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://github.com/airbytehq/airbyte
6
6
  Author: Airbyte
@@ -1,32 +1,32 @@
1
1
  airbyte_cdk/__init__.py,sha256=OBQWv5rF_QTRpOiP6J8J8oTU-GGrfi18i1PRFpahKks,262
2
- airbyte_cdk/config_observation.py,sha256=TSA2ulzRCZGmA1AK8hOJGkzayjCHvAIglHwM8vI1uuU,3295
2
+ airbyte_cdk/config_observation.py,sha256=3kjxv8xTwCnub2_fTWnMPRx0E7vly1BUeyXOSK15Ql4,3610
3
3
  airbyte_cdk/connector.py,sha256=LtTAmBFV1LBUz_fOEbQ_EvBhyUsz8AGOlDsvK8QOOo0,4396
4
- airbyte_cdk/entrypoint.py,sha256=cTYsCExsiKAmkhhWQLnmbe-o96mQnK1kriSr5Qm8Ntc,7968
4
+ airbyte_cdk/entrypoint.py,sha256=xQ7jLhElMl-Nl1aWHnlaPbCaVv6UNFuspBUo9w7glbU,8803
5
5
  airbyte_cdk/exception_handler.py,sha256=CwkiPdZ1WMOr3CBkvKFyHiyLerXGRqBrVlB4p0OImGI,1125
6
6
  airbyte_cdk/logger.py,sha256=4Mi2MEQi1uh59BP9Dxw_UEbZuxaJewqK_jvEU2b10nk,3985
7
7
  airbyte_cdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  airbyte_cdk/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
9
9
  airbyte_cdk/connector_builder/connector_builder_handler.py,sha256=q8mqQjNqpvHZgwVbNuvSe19o4Aw6MQTuhA2URmdz0K0,5443
10
10
  airbyte_cdk/connector_builder/main.py,sha256=jn2gqaYAvd6uDoFe0oVhnY23grm5sL-jfIX6kGvhVxk,2994
11
- airbyte_cdk/connector_builder/message_grouper.py,sha256=uJGOBhinvbisgAa-bQN3XE2L2xFTeVeykLwDCRYcxgc,12110
12
- airbyte_cdk/connector_builder/models.py,sha256=yW_j91B-3FYNTNbWjR2ZVYTXBHlskT55uxdAqg7FhAE,1221
11
+ airbyte_cdk/connector_builder/message_grouper.py,sha256=dGU85tsOvHkAoQD2lNHA_ibqdr9MNiGlt60nOCuA6yI,12502
12
+ airbyte_cdk/connector_builder/models.py,sha256=jL2SJIWJTLCbBqobw5Qo8WGS0aN-K9TRmfSpDHM5vYc,1277
13
13
  airbyte_cdk/destinations/__init__.py,sha256=0Uxmz3iBAyZJdk_bqUVt2pb0UwRTpFjTnFE6fQFbWKY,126
14
14
  airbyte_cdk/destinations/destination.py,sha256=_tIMnKcRQbtIsjVvNOVjfbIxgCNLuBXQwQj8MyVm3BI,5420
15
15
  airbyte_cdk/models/__init__.py,sha256=LPQcYdDPwrCXiBPe_jexO4UAcbovIb1V9tHB6I7Un30,633
16
16
  airbyte_cdk/models/airbyte_protocol.py,sha256=wKXV_4sCzmUyPndiW7HWAj_A6EDRJyk9cA88xvXGQN0,117
17
17
  airbyte_cdk/models/well_known_types.py,sha256=KKfNbow2gdLoC1Z4hcXy_JR8m_acsB2ol7gQuEgjobw,117
18
18
  airbyte_cdk/sources/__init__.py,sha256=4j6fLtoRCjcZnojpise4EMmQtV1RepBxoGTBgpz80JA,218
19
- airbyte_cdk/sources/abstract_source.py,sha256=svXe29SUHKA6WJwKlQlyKkZax2ybaG0wSvIFzaibl24,17262
19
+ airbyte_cdk/sources/abstract_source.py,sha256=IpHvPKhYvv36b-krP9vn1wowrfi9iZdqcxDGbl2-jVE,17743
20
20
  airbyte_cdk/sources/config.py,sha256=PYsY7y2u3EUwxLiEb96JnuKwH_E8CuxKggsRO2ZPSRc,856
21
21
  airbyte_cdk/sources/connector_state_manager.py,sha256=_R-2QnMGimKL0t5aV4f6P1dgd--TB3abY5Seg1xddXk,10469
22
22
  airbyte_cdk/sources/source.py,sha256=N3vHZzdUsBETFsql-YpO-LcgjolT_jcnAuHBhGD6Hqk,4278
23
23
  airbyte_cdk/sources/declarative/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
24
24
  airbyte_cdk/sources/declarative/create_partial.py,sha256=sUJOwD8hBzW4pxw2XhYlSTMgl-WMc5WpP5Oq_jo3fHw,3371
25
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=-Kt09XCMs61gEphShtPTMGrqVAamr4cml03_YjDuTLQ,74196
25
+ airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=O_U5vwYhXP19mkWhjJgRJCTHAPwf6xeOEbNDccUb_wg,78273
26
26
  airbyte_cdk/sources/declarative/declarative_source.py,sha256=U2As9PDKmcWDgbsWUo-RetJ9fxQOBlwntWZ0NOgs5Ac,1453
27
27
  airbyte_cdk/sources/declarative/declarative_stream.py,sha256=0iZSpypxt8bhO3Lmf3BpGRTO7Fp0Q2GI8m8xyJJUjeM,6580
28
28
  airbyte_cdk/sources/declarative/exceptions.py,sha256=kTPUA4I2NV4J6HDz-mKPGMrfuc592akJnOyYx38l_QM,176
29
- airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=vTbRNM8D9P_ChOu1GNvtNRt-PM2L9N5Y0pNRyfVFuZg,9759
29
+ airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=lJCJAHmKPssqnDLAnxU6fuwlNAVm_Ut1EQLTxDy8h1I,10018
30
30
  airbyte_cdk/sources/declarative/types.py,sha256=b_RJpL9TyAgxJIRYZx5BxpC39p-WccHKxbAqxWrn9oE,482
31
31
  airbyte_cdk/sources/declarative/yaml_declarative_source.py,sha256=I9Bs9RDsFT8JNiJWRDjKYhqwvv4pqzgYZtF5hVuTDqI,1684
32
32
  airbyte_cdk/sources/declarative/auth/__init__.py,sha256=DyQdO5mdKGsttWdEUqxb6WVgD7zTcvpJz-Oet_VNeBg,201
@@ -60,14 +60,14 @@ airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=dyIM-bzh54
60
60
  airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=Dc0F87nElWsz_Ikj938eQ9uqZvyqgFhZ8Dqf_-hvndc,4800
61
61
  airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=V6WGKJ9cXX1rjuM4bK3Cs9xEryMlkY2U3FMsSBhrgC8,3098
62
62
  airbyte_cdk/sources/declarative/models/__init__.py,sha256=EiYnzwCHZV7EYqMJqcy6xKSeHvTKZBsQndjbEwmiTW4,93
63
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=7XeAhmGHuNRYK97KwxvbrNXS1Az95O7gOMM3uRlGjrU,50104
63
+ airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=5RI0o8lTGBt4emHSF-Xsk0FE4LccnMCTBk-LK0PdiMA,53677
64
64
  airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
65
65
  airbyte_cdk/sources/declarative/parsers/class_types_registry.py,sha256=bK4a74opm6WHyV7HqOVws6GE5Z7cLNc5MaTha69abIQ,6086
66
66
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=y7_G5mM07zxT5YG975kdC2PAja-Uc83pYp8WrV3GNdo,522
67
67
  airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py,sha256=W8BcK4KOg4ifNXgsdeIoV4oneHjXBKcPHEZHIC4r-hM,3801
68
68
  airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=H23H3nURCxsvjq66Gn9naffp0HJ1fU03wLFu-5F0AhQ,7701
69
69
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=6ukHx0bBrCJm9rek1l_MEfS3U_gdJcM4pJRyifJEOp0,6412
70
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=LJWolEib5v7IpruG_LHgDYCRID1LbdEEtygMng6EiSw,47818
70
+ airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=u0dVRPddHEL6OYxFIT3Z6TC-8MiFeKKKw0gPMMpn37Y,48075
71
71
  airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=27sOWhw2LBQs62HchURakHQ2M_mtnOatNgU6q8RUtpU,476
72
72
  airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha256=fa6VtTwSoIkDI3SBoRtVx79opVtJX80_gU9bt31lspc,4785
73
73
  airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py,sha256=Fi3ocNZZoYkr0uvRgwoVSqne6enxRvi8DOHrASVK2PQ,1851
@@ -125,6 +125,8 @@ airbyte_cdk/sources/declarative/transformations/transformation.py,sha256=q_FDDDY
125
125
  airbyte_cdk/sources/deprecated/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
126
126
  airbyte_cdk/sources/deprecated/base_source.py,sha256=5FafxPLDAh2KNBnKxxlC8QvPRgDYUjmT5OzqEKz8kjI,3524
127
127
  airbyte_cdk/sources/deprecated/client.py,sha256=6G2xQZJ2BzMJa-Sq4VdvVM9Dwu11rEEwqHGhmXAb3h4,3560
128
+ airbyte_cdk/sources/message/__init__.py,sha256=WIXPTh8Sx18LhEV6sZ_aI5CDqfjc0b2KPBeKZKwbs6I,193
129
+ airbyte_cdk/sources/message/repository.py,sha256=VMGusWUdxtz6WGs0Lv-ut-CbVR222HdjhHvde1shg3E,1187
128
130
  airbyte_cdk/sources/singer/__init__.py,sha256=D3zQSiWT0B9t0kKE4JPZjrcDnP2YnFNJ3dfYqSaxo9w,246
129
131
  airbyte_cdk/sources/singer/singer_helpers.py,sha256=q1LmgjFxSnN-dobMy7nikUwcK-9FvW5QQfgTqiclbAE,15649
130
132
  airbyte_cdk/sources/singer/source.py,sha256=3YY8UTOXmctvMVUnYmIegmL3_IxF55iGP_bc_s2MZdY,8530
@@ -141,9 +143,9 @@ airbyte_cdk/sources/streams/http/auth/core.py,sha256=_s9wewvvIcOgYjhHGDj_YHApnF5
141
143
  airbyte_cdk/sources/streams/http/auth/oauth.py,sha256=zchPWN1utNg02F93f5b4UFI5OXYo8-QhocbsXhLdG4U,4135
142
144
  airbyte_cdk/sources/streams/http/auth/token.py,sha256=oU1ul0LsGsPGN_vOJOKw1xX2y_XWULRxjqXu7Rivcr8,1940
143
145
  airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py,sha256=RN0D3nOX1xLgwEwKWu6pkGy3XqBFzKSNZ8Lf6umU2eY,413
144
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py,sha256=dw9mmIOf05NDqKzzvRA3tXKjx1LvVGm1tPt8TQhf5Y8,5339
146
+ airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py,sha256=CRfMunZdowlUyaAgIG76NwUo2xISTjs1AJBbJMaZ-p0,5464
145
147
  airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py,sha256=T0hVF2cBXGgIfrCslvTC1uNm9rNbYjENNl2Cb3mXuSY,961
146
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py,sha256=Y94eU0Ad8tEnCurW-_vrrAnbbCc0Mo5W38aigr85oEw,11005
148
+ airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py,sha256=uK5n1oImmFkJJCRukvNNSxCwRcXPV0BAkeOmr5ep6LY,11531
147
149
  airbyte_cdk/sources/streams/http/requests_native_auth/token.py,sha256=hDti8DlF_R5YYX95hg9BPogYtG-KUYtOifrFDv_L3Hk,2456
148
150
  airbyte_cdk/sources/streams/utils/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
149
151
  airbyte_cdk/sources/streams/utils/stream_helper.py,sha256=8n1e27DqELN_KRXuWW1IE3ZjE9zvhclNqsKtOosI_Ds,1480
@@ -156,6 +158,7 @@ airbyte_cdk/sources/utils/schema_models.py,sha256=m1vOqNkkVYGblc492wKo11Zm5FK9F0
156
158
  airbyte_cdk/sources/utils/transform.py,sha256=4GYmO6bq33HF-a1in0dKQKqUOYI1bWItyuYF875bSQg,9493
157
159
  airbyte_cdk/utils/__init__.py,sha256=kFLcs2P-tbPyeVOJS9rOv1jZdnSpjG24ro0CHgt_CIk,215
158
160
  airbyte_cdk/utils/airbyte_secrets_utils.py,sha256=q3aDl8T10ufGbeqnUPqbZLxQcHdkf2kDfQK_upWzBbI,2894
161
+ airbyte_cdk/utils/datetime_format_inferrer.py,sha256=1z5lGq_DI9LFrT68ftlJSqndS6i-Rs1PX7T_RBtOJpA,3443
159
162
  airbyte_cdk/utils/event_timing.py,sha256=Hn5kCc9xGKLcV5EYpJCZwNiz9neKKu2WG8FJF_hy278,2377
160
163
  airbyte_cdk/utils/schema_inferrer.py,sha256=j0us_mEMj8PVVzSZfoS1adK7V7a--mSHQozo6xmsiIc,3720
161
164
  airbyte_cdk/utils/stream_status_utils.py,sha256=X1Vy7BhglycjdIWpfKDfwJussNCxYffelKt6Utjx-qY,1005
@@ -163,8 +166,8 @@ airbyte_cdk/utils/traced_exception.py,sha256=9G2sG9eYkvn6Aa7rMuUW_KIRszRaTc_xdnT
163
166
  source_declarative_manifest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
164
167
  source_declarative_manifest/main.py,sha256=HXzuRsRyhHwPrGU-hc4S7RrgoOoHImqkdfbmO2geBeE,1027
165
168
  unit_tests/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
166
- unit_tests/connector_builder/test_connector_builder_handler.py,sha256=V9p7AFECaLqSK-iGvu0OqwV6qREQC2BhWo0H4OoiiK4,26895
167
- unit_tests/connector_builder/test_message_grouper.py,sha256=XMVRW45RDTgy1YVzkV-jOXj7Ar2mzgDV8OW2QDzZjYU,28510
169
+ unit_tests/connector_builder/test_connector_builder_handler.py,sha256=0TQn6C_De9mpRMU6lcrcuKWIwAHKw2GVMG8iT6OTBMo,28489
170
+ unit_tests/connector_builder/test_message_grouper.py,sha256=MSj9bQd4MtGsmXP-wPHiq4nODbLyrNT-W2CVpNOs2tE,28116
168
171
  unit_tests/connector_builder/utils.py,sha256=AAggdGWP-mNuWOZUHLAVIbjTeIcdPo-3pbMm5zdYpS0,796
169
172
  unit_tests/destinations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
170
173
  unit_tests/destinations/test_destination.py,sha256=koG_j812KMkcIxoUH6XlAL3zsephZJmlHvyzJXm0dCs,10269
@@ -172,7 +175,7 @@ unit_tests/singer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
172
175
  unit_tests/singer/test_singer_helpers.py,sha256=pZV6VxJuK-3-FICNGmoGbokrA_zkaFZEd4rYZCVpSRU,1762
173
176
  unit_tests/singer/test_singer_source.py,sha256=edN_kv7dnYAdBveWdUYOs74ak0dK6p8uaX225h_ZILA,4442
174
177
  unit_tests/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
175
- unit_tests/sources/test_abstract_source.py,sha256=eHZjhfSN-fzqbvdZtGqa5FVwggoFDXxi5SBA1-LQi70,44194
178
+ unit_tests/sources/test_abstract_source.py,sha256=Gn5XJKQlJhxxqawS5T-81BUXmPNUvg3g2UK-kXq-v48,46351
176
179
  unit_tests/sources/test_config.py,sha256=gFXqU_6OjwHXkV4JHMqQUznxmvTWN8nAv0w0-FFpugc,2477
177
180
  unit_tests/sources/test_connector_state_manager.py,sha256=ynFxA63Cxe6t-wMMh9C6ByTlMAuk8W7H2FikDhnUEQ0,24264
178
181
  unit_tests/sources/test_source.py,sha256=eVtU9Zuc9gBsg11Pb5xjDtyU0gVrbYqbZ4RmzPvDw_M,24695
@@ -183,7 +186,7 @@ unit_tests/sources/declarative/test_declarative_stream.py,sha256=3leJnZIYHiFq8XI
183
186
  unit_tests/sources/declarative/test_manifest_declarative_source.py,sha256=GckUc3nepzZkD1UM24woHlYCVZb5DP4IAQC3IeMyZF0,58924
184
187
  unit_tests/sources/declarative/test_yaml_declarative_source.py,sha256=6HhsUFgB7ueN0yOUHWb4gpPYLng5jasxN_plvz3x37g,5097
185
188
  unit_tests/sources/declarative/auth/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
186
- unit_tests/sources/declarative/auth/test_oauth.py,sha256=j-xEUbRPs5jnRAvKCNLKDpEbAZLmXHEy9tSEkYUrYx0,8442
189
+ unit_tests/sources/declarative/auth/test_oauth.py,sha256=WOGs28NVOvb0lIy1ymtQgUEbI8r1Z2fBIY6iWBqCnoE,8514
187
190
  unit_tests/sources/declarative/auth/test_session_token_auth.py,sha256=mxWCm_0AyVI6J1Q5CjogXY-EkXFfWkMZjNtBeb0bOow,6135
188
191
  unit_tests/sources/declarative/auth/test_token_auth.py,sha256=EIaxGFvaUE6vAUW2_tBrds6nTx4qhfYK8ppRwoNXKd0,6162
189
192
  unit_tests/sources/declarative/checks/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
@@ -247,6 +250,8 @@ unit_tests/sources/declarative/schema/source_test/__init__.py,sha256=4Hw-PX1-VgE
247
250
  unit_tests/sources/declarative/states/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
248
251
  unit_tests/sources/declarative/stream_slicers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
249
252
  unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py,sha256=MI1kLtMuC1LKryBzub0KconsrpIVgPOhAtYM4b3qRfA,9507
253
+ unit_tests/sources/message/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
254
+ unit_tests/sources/message/test_repository.py,sha256=qgCFpRUZU_Mm2JePtyIX5KheFYXCDj1ODTlo8z-Yz4Y,2234
250
255
  unit_tests/sources/streams/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
251
256
  unit_tests/sources/streams/test_availability_strategy.py,sha256=vJrSEk9NwRghu0YsSNoMYHKWzA9UFemwyClpke8Mk2s,2315
252
257
  unit_tests/sources/streams/test_streams_core.py,sha256=YOC7XqWFJ13Z4YuO9Nh4AR4AwpJ-s111vqPplFfpxk4,5059
@@ -256,14 +261,15 @@ unit_tests/sources/streams/http/test_http.py,sha256=H0lGcb0XHuM1R7GC3wAaaxhGoNwi
256
261
  unit_tests/sources/streams/http/auth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
257
262
  unit_tests/sources/streams/http/auth/test_auth.py,sha256=gdWpJ-cR64qRXmmPOQWhVd4E6ekXyJEIEfJxA0jlDvc,6546
258
263
  unit_tests/sources/streams/http/requests_native_auth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
259
- unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py,sha256=_BZVsG_LZUXfBmHWTlKIw65eGkdwFSiKRlpjsccj61U,12396
264
+ unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py,sha256=NoTfDSClXFqjbN_zvoleVWO0lDhjR4obWYn5ApQkWnI,14166
260
265
  unit_tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
266
+ unit_tests/utils/test_datetime_format_inferrer.py,sha256=Io2o5flTre9gyI_IDDMpzxOjCz3sr16LO0GRqOD59uk,2946
261
267
  unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg4MNPAG-xhpk,7817
262
268
  unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
263
269
  unit_tests/utils/test_stream_status_utils.py,sha256=NpV155JMXA6CG-2Zvofa14lItobyh3Onttc59X4m5DI,3382
264
270
  unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
265
- airbyte_cdk-0.40.2.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
266
- airbyte_cdk-0.40.2.dist-info/METADATA,sha256=Ts5OITHn2vkPFfNNDy5V3dylvBgEZXRPIKVmuCvijZw,8902
267
- airbyte_cdk-0.40.2.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
268
- airbyte_cdk-0.40.2.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
269
- airbyte_cdk-0.40.2.dist-info/RECORD,,
271
+ airbyte_cdk-0.40.4.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
272
+ airbyte_cdk-0.40.4.dist-info/METADATA,sha256=cg5ce7pYJVInYpcsugnD0J80AjRXnzzaKY11MEHKBeg,8902
273
+ airbyte_cdk-0.40.4.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
274
+ airbyte_cdk-0.40.4.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
275
+ airbyte_cdk-0.40.4.dist-info/RECORD,,
@@ -176,9 +176,9 @@ def invalid_config_file(tmp_path):
176
176
 
177
177
 
178
178
  def test_handle_resolve_manifest(valid_resolve_manifest_config_file, dummy_catalog):
179
- with mock.patch.object(connector_builder.main, "handle_connector_builder_request") as patch:
179
+ with mock.patch.object(connector_builder.main, "handle_connector_builder_request") as patched_handle:
180
180
  handle_request(["read", "--config", str(valid_resolve_manifest_config_file), "--catalog", str(dummy_catalog)])
181
- assert patch.call_count == 1
181
+ assert patched_handle.call_count == 1
182
182
 
183
183
 
184
184
  def test_handle_test_read(valid_read_config_file, configured_catalog):
@@ -354,6 +354,7 @@ def test_read():
354
354
  ],
355
355
  test_read_limit_reached=False,
356
356
  inferred_schema=None,
357
+ inferred_datetime_formats=None,
357
358
  latest_config_update={}
358
359
  )
359
360
 
@@ -368,6 +369,7 @@ def test_read():
368
369
  ],
369
370
  "test_read_limit_reached": False,
370
371
  "inferred_schema": None,
372
+ "inferred_datetime_formats": None,
371
373
  "latest_config_update": {}
372
374
  },
373
375
  emitted_at=1,
@@ -382,6 +384,37 @@ def test_read():
382
384
  assert output_record == expected_airbyte_message
383
385
 
384
386
 
387
+ def test_config_update():
388
+ manifest = copy.deepcopy(MANIFEST)
389
+ manifest["definitions"]["retriever"]["requester"]["authenticator"] = {
390
+ "type": "OAuthAuthenticator",
391
+ "token_refresh_endpoint": "https://oauth.endpoint.com/tokens/bearer",
392
+ "client_id": "{{ config['credentials']['client_id'] }}",
393
+ "client_secret": "{{ config['credentials']['client_secret'] }}",
394
+ "refresh_token": "{{ config['credentials']['refresh_token'] }}",
395
+ "refresh_token_updater": {}
396
+ }
397
+ config = copy.deepcopy(TEST_READ_CONFIG)
398
+ config["__injected_declarative_manifest"] = manifest
399
+ config["credentials"] = {
400
+ "client_id": "a client id",
401
+ "client_secret": "a client secret",
402
+ "refresh_token": "a refresh token",
403
+ }
404
+ source = ManifestDeclarativeSource(manifest)
405
+
406
+ refresh_request_response = {
407
+ "access_token": "an updated access token",
408
+ "refresh_token": "an updated refresh token",
409
+ "expires_in": 3600,
410
+ }
411
+ with patch("airbyte_cdk.sources.streams.http.requests_native_auth.SingleUseRefreshTokenOauth2Authenticator._get_refresh_access_token_response", return_value=refresh_request_response):
412
+ output = handle_connector_builder_request(
413
+ source, "test_read", config, ConfiguredAirbyteCatalog.parse_obj(CONFIGURED_CATALOG), TestReadLimits()
414
+ )
415
+ assert output.record.data["latest_config_update"]
416
+
417
+
385
418
  @patch("traceback.TracebackException.from_exception")
386
419
  def test_read_returns_error_response(mock_from_exception):
387
420
  class MockManifestDeclarativeSource:
@@ -410,7 +443,8 @@ def test_read_returns_error_response(mock_from_exception):
410
443
  slice_descriptor=None, state=None)],
411
444
  test_read_limit_reached=False,
412
445
  inferred_schema=None,
413
- latest_config_update={})
446
+ inferred_datetime_formats={},
447
+ latest_config_update=None)
414
448
 
415
449
  expected_message = AirbyteMessage(
416
450
  type=MessageType.RECORD,
@@ -94,7 +94,8 @@ def test_get_grouped_messages(mock_entrypoint_read):
94
94
  "body": {"custom": "field"},
95
95
  }
96
96
  response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}', "http_method": "GET"}
97
- expected_schema = {"$schema": "http://json-schema.org/schema#", "properties": {"name": {"type": "string"}}, "type": "object"}
97
+ expected_schema = {"$schema": "http://json-schema.org/schema#", "properties": {"name": {"type": "string"}, "date": {"type": "string"}}, "type": "object"}
98
+ expected_datetime_fields = {"date":"%Y-%m-%d"}
98
99
  expected_pages = [
99
100
  StreamReadPages(
100
101
  request=HttpRequest(
@@ -105,7 +106,7 @@ def test_get_grouped_messages(mock_entrypoint_read):
105
106
  http_method="GET",
106
107
  ),
107
108
  response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'),
108
- records=[{"name": "Shinobu Kocho"}, {"name": "Muichiro Tokito"}],
109
+ records=[{"name": "Shinobu Kocho", "date": "2023-03-03"}, {"name": "Muichiro Tokito", "date": "2023-03-04"}],
109
110
  ),
110
111
  StreamReadPages(
111
112
  request=HttpRequest(
@@ -116,7 +117,7 @@ def test_get_grouped_messages(mock_entrypoint_read):
116
117
  http_method="GET",
117
118
  ),
118
119
  response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'),
119
- records=[{"name": "Mitsuri Kanroji"}],
120
+ records=[{"name": "Mitsuri Kanroji", "date": "2023-03-05"}],
120
121
  ),
121
122
  ]
122
123
 
@@ -124,11 +125,11 @@ def test_get_grouped_messages(mock_entrypoint_read):
124
125
  [
125
126
  request_log_message(request),
126
127
  response_log_message(response),
127
- record_message("hashiras", {"name": "Shinobu Kocho"}),
128
- record_message("hashiras", {"name": "Muichiro Tokito"}),
128
+ record_message("hashiras", {"name": "Shinobu Kocho", "date": "2023-03-03"}),
129
+ record_message("hashiras", {"name": "Muichiro Tokito", "date": "2023-03-04"}),
129
130
  request_log_message(request),
130
131
  response_log_message(response),
131
- record_message("hashiras", {"name": "Mitsuri Kanroji"}),
132
+ record_message("hashiras", {"name": "Mitsuri Kanroji", "date": "2023-03-05"}),
132
133
  ]
133
134
  ))
134
135
 
@@ -138,6 +139,7 @@ def test_get_grouped_messages(mock_entrypoint_read):
138
139
  )
139
140
 
140
141
  assert actual_response.inferred_schema == expected_schema
142
+ assert actual_response.inferred_datetime_formats == expected_datetime_fields
141
143
 
142
144
  single_slice = actual_response.slices[0]
143
145
  for i, actual_page in enumerate(single_slice.pages):
@@ -552,18 +554,6 @@ def test_given_control_message_then_stream_read_has_config_update(mock_entrypoin
552
554
  assert stream_read.latest_config_update == updated_config
553
555
 
554
556
 
555
- @patch('airbyte_cdk.connector_builder.message_grouper.AirbyteEntrypoint.read')
556
- def test_given_no_control_message_then_use_in_memory_config_change_as_update(mock_entrypoint_read):
557
- mock_source = make_mock_source(mock_entrypoint_read, iter(any_request_and_response_with_a_record()))
558
- connector_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
559
- full_config = {**CONFIG, **{"__injected_declarative_manifest": MANIFEST}}
560
- stream_read: StreamRead = connector_builder_handler.get_message_groups(
561
- source=mock_source, config=full_config, configured_catalog=create_configured_catalog("hashiras")
562
- )
563
-
564
- assert stream_read.latest_config_update == CONFIG
565
-
566
-
567
557
  @patch('airbyte_cdk.connector_builder.message_grouper.AirbyteEntrypoint.read')
568
558
  def test_given_multiple_control_messages_then_stream_read_has_latest_based_on_emitted_at(mock_entrypoint_read):
569
559
  earliest = 0
@@ -203,7 +203,7 @@ class TestOauth2Authenticator:
203
203
  assert oauth.get_token_expiry_date() == pendulum.parse(next_day)
204
204
 
205
205
 
206
- def mock_request(method, url, data):
207
- if url == "refresh_end":
206
+ def mock_request(method, url, data, headers):
207
+ if url == "refresh_end" and headers == {"Content-Type": "application/json"}:
208
208
  return resp
209
- raise Exception(f"Error while refreshing access token with request: {method}, {url}, {data}")
209
+ raise Exception(f"Error while refreshing access token with request: {method}, {url}, {data}, {headers}")
File without changes
@@ -0,0 +1,65 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import pytest
6
+ from airbyte_cdk.models import (
7
+ AirbyteControlConnectorConfigMessage,
8
+ AirbyteControlMessage,
9
+ AirbyteLogMessage,
10
+ AirbyteMessage,
11
+ Level,
12
+ OrchestratorType,
13
+ Type,
14
+ )
15
+ from airbyte_cdk.sources.message import InMemoryMessageRepository
16
+
17
+ A_CONTROL = AirbyteControlMessage(
18
+ type=OrchestratorType.CONNECTOR_CONFIG,
19
+ emitted_at=0,
20
+ connectorConfig=AirbyteControlConnectorConfigMessage(config={"a config": "value"}),
21
+ )
22
+ ANOTHER_CONTROL = AirbyteControlMessage(
23
+ type=OrchestratorType.CONNECTOR_CONFIG,
24
+ emitted_at=0,
25
+ connectorConfig=AirbyteControlConnectorConfigMessage(config={"another config": "another value"}),
26
+ )
27
+
28
+
29
+ def test_given_no_messages_when_consume_queue_then_return_empty():
30
+ repo = InMemoryMessageRepository()
31
+ messages = list(repo.consume_queue())
32
+ assert messages == []
33
+
34
+
35
+ def test_given_messages_when_consume_queue_then_return_messages():
36
+ repo = InMemoryMessageRepository()
37
+ first_message = AirbyteMessage(type=Type.CONTROL, control=A_CONTROL)
38
+ repo.emit_message(first_message)
39
+ second_message = AirbyteMessage(type=Type.CONTROL, control=ANOTHER_CONTROL)
40
+ repo.emit_message(second_message)
41
+
42
+ messages = repo.consume_queue()
43
+
44
+ assert list(messages) == [first_message, second_message]
45
+
46
+
47
+ def test_given_message_is_consumed_when_consume_queue_then_remove_message_from_queue():
48
+ repo = InMemoryMessageRepository()
49
+ first_message = AirbyteMessage(type=Type.CONTROL, control=A_CONTROL)
50
+ repo.emit_message(first_message)
51
+ second_message = AirbyteMessage(type=Type.CONTROL, control=ANOTHER_CONTROL)
52
+ repo.emit_message(second_message)
53
+
54
+ message_generator = repo.consume_queue()
55
+ consumed_message = next(message_generator)
56
+ assert consumed_message == first_message
57
+
58
+ second_message_generator = repo.consume_queue()
59
+ assert list(second_message_generator) == [second_message]
60
+
61
+
62
+ def test_given_message_is_not_control_message_when_emit_message_then_raise_error():
63
+ repo = InMemoryMessageRepository()
64
+ with pytest.raises(ValueError):
65
+ repo.emit_message(AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message="any log message")))
@@ -4,11 +4,13 @@
4
4
 
5
5
  import json
6
6
  import logging
7
+ from unittest.mock import Mock
7
8
 
8
9
  import freezegun
9
10
  import pendulum
10
11
  import pytest
11
12
  import requests
13
+ from airbyte_cdk.models import OrchestratorType, Type
12
14
  from airbyte_cdk.sources.streams.http.requests_native_auth import (
13
15
  BasicHttpAuthenticator,
14
16
  MultipleTokenAuthenticator,
@@ -243,7 +245,7 @@ class TestSingleUseRefreshTokenOauth2Authenticator:
243
245
  ("date_format", "2023-04-04", "YYYY-MM-DD", "2023-04-04T00:00:00+00:00"),
244
246
  ]
245
247
  )
246
- def test_get_access_token(self, test_name, expires_in_value, expiry_date_format, expected_expiry_date, capsys, mocker, connector_config):
248
+ def test_given_no_message_repository_get_access_token(self, test_name, expires_in_value, expiry_date_format, expected_expiry_date, capsys, mocker, connector_config):
247
249
  authenticator = SingleUseRefreshTokenOauth2Authenticator(
248
250
  connector_config,
249
251
  token_refresh_endpoint="foobar",
@@ -270,6 +272,30 @@ class TestSingleUseRefreshTokenOauth2Authenticator:
270
272
  assert not captured.out
271
273
  assert authenticator.access_token == access_token == "new_access_token"
272
274
 
275
+ def test_given_message_repository_when_get_access_token_emit_message(self, mocker, connector_config):
276
+ message_repository = Mock()
277
+ authenticator = SingleUseRefreshTokenOauth2Authenticator(
278
+ connector_config,
279
+ token_refresh_endpoint="foobar",
280
+ client_id=connector_config["credentials"]["client_id"],
281
+ client_secret=connector_config["credentials"]["client_secret"],
282
+ token_expiry_date_format="YYYY-MM-DD",
283
+ message_repository=message_repository,
284
+ )
285
+ authenticator.refresh_access_token = mocker.Mock(return_value=("new_access_token", "2023-04-04", "new_refresh_token"))
286
+ authenticator.token_has_expired = mocker.Mock(return_value=True)
287
+
288
+ authenticator.get_access_token()
289
+
290
+ emitted_message = message_repository.emit_message.call_args_list[0].args[0]
291
+ assert emitted_message.type == Type.CONTROL
292
+ assert emitted_message.control.type == OrchestratorType.CONNECTOR_CONFIG
293
+ assert emitted_message.control.connectorConfig.config["credentials"]["access_token"] == "new_access_token"
294
+ assert emitted_message.control.connectorConfig.config["credentials"]["refresh_token"] == "new_refresh_token"
295
+ assert emitted_message.control.connectorConfig.config["credentials"]["token_expiry_date"] == "2023-04-04T00:00:00+00:00"
296
+ assert emitted_message.control.connectorConfig.config["credentials"]["client_id"] == "my_client_id"
297
+ assert emitted_message.control.connectorConfig.config["credentials"]["client_secret"] == "my_client_secret"
298
+
273
299
  def test_refresh_access_token(self, mocker, connector_config):
274
300
  authenticator = SingleUseRefreshTokenOauth2Authenticator(
275
301
  connector_config,
@@ -288,7 +314,7 @@ class TestSingleUseRefreshTokenOauth2Authenticator:
288
314
  assert authenticator.refresh_access_token() == ("new_access_token", "42", "new_refresh_token")
289
315
 
290
316
 
291
- def mock_request(method, url, data):
292
- if url == "refresh_end":
317
+ def mock_request(method, url, data, headers):
318
+ if url == "refresh_end" and headers == {"Content-Type": "application/json"}:
293
319
  return resp
294
- raise Exception(f"Error while refreshing access token with request: {method}, {url}, {data}")
320
+ raise Exception(f"Error while refreshing access token with request: {method}, {url}, {data}, {headers}")
@@ -7,7 +7,7 @@ import datetime
7
7
  import logging
8
8
  from collections import defaultdict
9
9
  from typing import Any, Callable, Dict, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union
10
- from unittest.mock import call
10
+ from unittest.mock import Mock, call
11
11
 
12
12
  import pytest
13
13
  from airbyte_cdk.models import (
@@ -37,9 +37,11 @@ from airbyte_cdk.models import Type
37
37
  from airbyte_cdk.models import Type as MessageType
38
38
  from airbyte_cdk.sources import AbstractSource
39
39
  from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
40
+ from airbyte_cdk.sources.message import MessageRepository
40
41
  from airbyte_cdk.sources.streams import IncrementalMixin, Stream
41
42
  from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
42
43
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
44
+ from pytest import fixture
43
45
 
44
46
  logger = logging.getLogger("airbyte")
45
47
 
@@ -50,10 +52,12 @@ class MockSource(AbstractSource):
50
52
  check_lambda: Callable[[], Tuple[bool, Optional[Any]]] = None,
51
53
  streams: List[Stream] = None,
52
54
  per_stream: bool = True,
55
+ message_repository: MessageRepository = None
53
56
  ):
54
57
  self._streams = streams
55
58
  self.check_lambda = check_lambda
56
59
  self.per_stream = per_stream
60
+ self._message_repository = message_repository
57
61
 
58
62
  def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
59
63
  if self.check_lambda:
@@ -69,6 +73,10 @@ class MockSource(AbstractSource):
69
73
  def per_stream_state_enabled(self) -> bool:
70
74
  return self.per_stream
71
75
 
76
+ @property
77
+ def message_repository(self):
78
+ return self._message_repository
79
+
72
80
 
73
81
  class StreamNoStateMethod(Stream):
74
82
  name = "managers"
@@ -97,6 +105,16 @@ class MockStreamOverridesStateMethod(Stream, IncrementalMixin):
97
105
  self._cursor_value = value.get(self.cursor_field, self.start_date)
98
106
 
99
107
 
108
+ MESSAGE_FROM_REPOSITORY = Mock()
109
+
110
+
111
+ @fixture
112
+ def message_repository():
113
+ message_repository = Mock(spec=MessageRepository)
114
+ message_repository.consume_queue.return_value = [message for message in [MESSAGE_FROM_REPOSITORY]]
115
+ return message_repository
116
+
117
+
100
118
  def test_successful_check():
101
119
  """Tests that if a source returns TRUE for the connection check the appropriate connectionStatus success message is returned"""
102
120
  expected = AirbyteConnectionStatus(status=Status.SUCCEEDED)
@@ -221,6 +239,34 @@ def test_read_nonexistent_stream_raises_exception(mocker):
221
239
  list(src.read(logger, {}, catalog))
222
240
 
223
241
 
242
+ def test_read_stream_emits_repository_message_before_record(mocker, message_repository):
243
+ stream = MockStream(name="my_stream")
244
+ mocker.patch.object(MockStream, "get_json_schema", return_value={})
245
+ mocker.patch.object(MockStream, "read_records", side_effect=[[{"a record": "a value"}, {"another record": "another value"}]])
246
+ message_repository.consume_queue.side_effect = [[message for message in [MESSAGE_FROM_REPOSITORY]], []]
247
+
248
+ source = MockSource(streams=[stream], message_repository=message_repository)
249
+
250
+ messages = list(source.read(logger, {}, ConfiguredAirbyteCatalog(streams=[_configured_stream(stream, SyncMode.full_refresh)])))
251
+
252
+ assert messages.count(MESSAGE_FROM_REPOSITORY) == 1
253
+ record_messages = (message for message in messages if message.type == Type.RECORD)
254
+ assert all(messages.index(MESSAGE_FROM_REPOSITORY) < messages.index(record) for record in record_messages)
255
+
256
+
257
+ def test_read_stream_emits_repository_message_on_error(mocker, message_repository):
258
+ stream = MockStream(name="my_stream")
259
+ mocker.patch.object(MockStream, "get_json_schema", return_value={})
260
+ mocker.patch.object(MockStream, "read_records", side_effect=RuntimeError("error"))
261
+ message_repository.consume_queue.return_value = [message for message in [MESSAGE_FROM_REPOSITORY]]
262
+
263
+ source = MockSource(streams=[stream], message_repository=message_repository)
264
+
265
+ with pytest.raises(RuntimeError):
266
+ messages = list(source.read(logger, {}, ConfiguredAirbyteCatalog(streams=[_configured_stream(stream, SyncMode.full_refresh)])))
267
+ assert MESSAGE_FROM_REPOSITORY in messages
268
+
269
+
224
270
  def test_read_stream_with_error_gets_display_message(mocker):
225
271
  stream = MockStream(name="my_stream")
226
272
 
@@ -0,0 +1,53 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from typing import Dict, List
6
+
7
+ import pytest
8
+ from airbyte_cdk.models.airbyte_protocol import AirbyteRecordMessage
9
+ from airbyte_cdk.utils.datetime_format_inferrer import DatetimeFormatInferrer
10
+
11
+ NOW = 1234567
12
+
13
+
14
+ @pytest.mark.parametrize(
15
+ "test_name,input_records,expected_candidate_fields",
16
+ [
17
+ ("empty", [], {}),
18
+ ("simple_match", [{"d": "2022-02-03"}], {"d": "%Y-%m-%d"}),
19
+ ("timestamp_match_integer", [{"d": 1686058051}], {"d": "%s"}),
20
+ ("timestamp_match_string", [{"d": "1686058051"}], {"d": "%s"}),
21
+ ("timestamp_no_match_integer", [{"d": 99}], {}),
22
+ ("timestamp_no_match_string", [{"d": "99999999999999999999"}], {}),
23
+ ("simple_no_match", [{"d": "20220203"}], {}),
24
+ ("multiple_match", [{"d": "2022-02-03", "e": "2022-02-03"}], {"d": "%Y-%m-%d", "e": "%Y-%m-%d"}),
25
+ (
26
+ "multiple_no_match",
27
+ [{"d": "20220203", "r": "ccc", "e": {"something-else": "2023-03-03"}, "s": ["2023-03-03"], "x": False, "y": 123}],
28
+ {},
29
+ ),
30
+ ("format_1", [{"d": "2022-02-03"}], {"d": "%Y-%m-%d"}),
31
+ ("format_2", [{"d": "2022-02-03 12:34:56"}], {"d": "%Y-%m-%d %H:%M:%S"}),
32
+ ("format_3", [{"d": "2022-02-03 12:34:56.123456+00:00"}], {"d": "%Y-%m-%d %H:%M:%S.%f+00:00"}),
33
+ ("format_4", [{"d": "2022-02-03T12:34:56.123456+0000"}], {"d": "%Y-%m-%dT%H:%M:%S.%f%z"}),
34
+ ("format_4 2", [{"d": "2022-02-03T12:34:56.000Z"}], {"d": "%Y-%m-%dT%H:%M:%S.%f%z"}),
35
+ ("format_4 2", [{"d": "2022-02-03T12:34:56.000000Z"}], {"d": "%Y-%m-%dT%H:%M:%S.%f%z"}),
36
+ ("format_6", [{"d": "03/02/2022 12:34"}], {"d": "%d/%m/%Y %H:%M"}),
37
+ ("format_7", [{"d": "2022-02"}], {"d": "%Y-%m"}),
38
+ ("format_8", [{"d": "03-02-2022"}], {"d": "%d-%m-%Y"}),
39
+ ("limit_down", [{"d": "2022-02-03", "x": "2022-02-03"}, {"d": "2022-02-03", "x": "another thing"}], {"d": "%Y-%m-%d"}),
40
+ ("limit_down all", [{"d": "2022-02-03", "x": "2022-02-03"}, {"d": "also another thing", "x": "another thing"}], {}),
41
+ ("limit_down empty", [{"d": "2022-02-03", "x": "2022-02-03"}, {}], {}),
42
+ ("limit_down unsupported type", [{"d": "2022-02-03"}, {"d": False}], {}),
43
+ ("limit_down complex type", [{"d": "2022-02-03"}, {"d": {"date": "2022-03-03"}}], {}),
44
+ ("limit_down different format", [{"d": "2022-02-03"}, {"d": 1686058051}], {}),
45
+ ("limit_down different format", [{"d": "2022-02-03"}, {"d": "2022-02-03T12:34:56.000000Z"}], {}),
46
+ ("no scope expand", [{}, {"d": "2022-02-03"}], {}),
47
+ ],
48
+ )
49
+ def test_schema_inferrer(test_name, input_records: List, expected_candidate_fields: Dict[str, str]):
50
+ inferrer = DatetimeFormatInferrer()
51
+ for record in input_records:
52
+ inferrer.accumulate(AirbyteRecordMessage(stream="abc", data=record, emitted_at=NOW))
53
+ assert inferrer.get_inferred_datetime_formats() == expected_candidate_fields