airbyte-cdk 0.55.3__py3-none-any.whl → 0.55.5__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
airbyte_cdk/entrypoint.py CHANGED
@@ -106,6 +106,9 @@ class AirbyteEntrypoint(object):
106
106
  raw_config = self.source.read_config(parsed_args.config)
107
107
  config = self.source.configure(raw_config, temp_dir)
108
108
 
109
+ yield from [
110
+ self.airbyte_message_to_string(queued_message) for queued_message in self._emit_queued_messages(self.source)
111
+ ]
109
112
  if cmd == "check":
110
113
  yield from map(AirbyteEntrypoint.airbyte_message_to_string, self.check(source_spec, config))
111
114
  elif cmd == "discover":
@@ -19,7 +19,7 @@ from airbyte_cdk.models import (
19
19
  )
20
20
  from airbyte_cdk.models import Type as MessageType
21
21
  from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
22
- from airbyte_cdk.sources.message import MessageRepository
22
+ from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
23
23
  from airbyte_cdk.sources.source import Source
24
24
  from airbyte_cdk.sources.streams import Stream
25
25
  from airbyte_cdk.sources.streams.core import StreamData
@@ -31,6 +31,8 @@ from airbyte_cdk.utils.event_timing import create_timer
31
31
  from airbyte_cdk.utils.stream_status_utils import as_airbyte_message as stream_status_as_airbyte_message
32
32
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
33
33
 
34
+ _default_message_repository = InMemoryMessageRepository()
35
+
34
36
 
35
37
  class AbstractSource(Source, ABC):
36
38
  """
@@ -269,4 +271,4 @@ class AbstractSource(Source, ABC):
269
271
 
270
272
  @property
271
273
  def message_repository(self) -> Union[None, MessageRepository]:
272
- return None
274
+ return _default_message_repository
@@ -46,7 +46,9 @@ class FileBasedStreamConfig(BaseModel):
46
46
  description="The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.",
47
47
  )
48
48
  primary_key: Optional[str] = Field(
49
- title="Primary Key", description="The column or columns (for a composite key) that serves as the unique identifier of a record."
49
+ title="Primary Key",
50
+ description="The column or columns (for a composite key) that serves as the unique identifier of a record.",
51
+ airbyte_hidden=True, # Users can create/modify primary keys in the connection configuration so we shouldn't duplicate it here.
50
52
  )
51
53
  days_to_sync_if_history_is_full: int = Field(
52
54
  title="Days To Sync If History Is Full",
@@ -0,0 +1,116 @@
1
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2
+
3
+ """
4
+ The AirbyteEntrypoint is important because it is a service layer that orchestrate how we execute commands from the
5
+ [common interface](https://docs.airbyte.com/understanding-airbyte/airbyte-protocol#common-interface) through the source Python
6
+ implementation. There is some logic about which message we send to the platform and when which is relevant for integration testing. Other
7
+ than that, there are integrations point that are annoying to integrate with using Python code:
8
+ * Sources communicate with the platform using stdout. The implication is that the source could just print every message instead of
9
+ returning things to source.<method> or to using the message repository. WARNING: As part of integration testing, we will not support
10
+ messages that are simply printed. The reason is that capturing stdout relies on overriding sys.stdout (see
11
+ https://docs.python.org/3/library/contextlib.html#contextlib.redirect_stdout) which clashes with how pytest captures logs and brings
12
+ considerations for multithreaded applications. If code you work with uses `print` statements, please migrate to
13
+ source.message_repository to emit those messages
14
+ * The entrypoint interface relies on file being written on the file system
15
+ """
16
+
17
+ import json
18
+ import logging
19
+ import tempfile
20
+ from io import StringIO
21
+ from pathlib import Path
22
+ from typing import Any, List, Mapping, Optional, Union
23
+
24
+ from airbyte_cdk.entrypoint import AirbyteEntrypoint
25
+ from airbyte_cdk.logger import AirbyteLogFormatter
26
+ from airbyte_cdk.sources import Source
27
+ from airbyte_protocol.models import AirbyteLogMessage, AirbyteMessage, ConfiguredAirbyteCatalog, Level, TraceType, Type
28
+ from pydantic.error_wrappers import ValidationError
29
+
30
+
31
+ class EntrypointOutput:
32
+ def __init__(self, messages: List[str]):
33
+ try:
34
+ self._messages = [self._parse_message(message) for message in messages]
35
+ except ValidationError as exception:
36
+ raise ValueError("All messages are expected to be AirbyteMessage") from exception
37
+
38
+ @staticmethod
39
+ def _parse_message(message: str) -> AirbyteMessage:
40
+ try:
41
+ return AirbyteMessage.parse_obj(json.loads(message))
42
+ except (json.JSONDecodeError, ValidationError):
43
+ # The platform assumes that logs that are not of AirbyteMessage format are log messages
44
+ return AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message=message))
45
+
46
+ @property
47
+ def records_and_state_messages(self) -> List[AirbyteMessage]:
48
+ return self._get_message_by_types([Type.RECORD, Type.STATE])
49
+
50
+ @property
51
+ def records(self) -> List[AirbyteMessage]:
52
+ return self._get_message_by_types([Type.RECORD])
53
+
54
+ @property
55
+ def state_messages(self) -> List[AirbyteMessage]:
56
+ return self._get_message_by_types([Type.STATE])
57
+
58
+ @property
59
+ def logs(self) -> List[AirbyteMessage]:
60
+ return self._get_message_by_types([Type.LOG])
61
+
62
+ @property
63
+ def trace_messages(self) -> List[AirbyteMessage]:
64
+ return self._get_message_by_types([Type.TRACE])
65
+
66
+ @property
67
+ def analytics_messages(self) -> List[AirbyteMessage]:
68
+ return [message for message in self._get_message_by_types([Type.TRACE]) if message.trace.type == TraceType.ANALYTICS]
69
+
70
+ def _get_message_by_types(self, message_types: List[Type]) -> List[AirbyteMessage]:
71
+ return [message for message in self._messages if message.type in message_types]
72
+
73
+
74
+ def read(source: Source, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: Optional[Any] = None) -> EntrypointOutput:
75
+ """
76
+ config and state must be json serializable
77
+ """
78
+ log_capture_buffer = StringIO()
79
+ stream_handler = logging.StreamHandler(log_capture_buffer)
80
+ stream_handler.setLevel(logging.INFO)
81
+ stream_handler.setFormatter(AirbyteLogFormatter())
82
+ parent_logger = logging.getLogger("")
83
+ parent_logger.addHandler(stream_handler)
84
+
85
+ with tempfile.TemporaryDirectory() as tmp_directory:
86
+ tmp_directory_path = Path(tmp_directory)
87
+ args = [
88
+ "read",
89
+ "--config",
90
+ make_file(tmp_directory_path / "config.json", config),
91
+ "--catalog",
92
+ make_file(tmp_directory_path / "catalog.json", catalog.json()),
93
+ ]
94
+ if state:
95
+ args.extend(
96
+ [
97
+ "--state",
98
+ make_file(tmp_directory_path / "state.json", state),
99
+ ]
100
+ )
101
+ source_entrypoint = AirbyteEntrypoint(source)
102
+ parsed_args = source_entrypoint.parse_args(args)
103
+ messages = list(source_entrypoint.run(parsed_args))
104
+ captured_logs = log_capture_buffer.getvalue().split("\n")[:-1]
105
+
106
+ parent_logger.removeHandler(stream_handler)
107
+
108
+ return EntrypointOutput(messages + captured_logs)
109
+
110
+
111
+ def make_file(path: Path, file_contents: Optional[Union[str, Mapping[str, Any], List[Mapping[str, Any]]]]) -> str:
112
+ if isinstance(file_contents, str):
113
+ path.write_text(file_contents)
114
+ else:
115
+ path.write_text(json.dumps(file_contents))
116
+ return str(path)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 0.55.3
3
+ Version: 0.55.5
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://github.com/airbytehq/airbyte
6
6
  Author: Airbyte
@@ -1,7 +1,7 @@
1
1
  airbyte_cdk/__init__.py,sha256=OBQWv5rF_QTRpOiP6J8J8oTU-GGrfi18i1PRFpahKks,262
2
2
  airbyte_cdk/config_observation.py,sha256=3kjxv8xTwCnub2_fTWnMPRx0E7vly1BUeyXOSK15Ql4,3610
3
3
  airbyte_cdk/connector.py,sha256=LtTAmBFV1LBUz_fOEbQ_EvBhyUsz8AGOlDsvK8QOOo0,4396
4
- airbyte_cdk/entrypoint.py,sha256=upN2KOPYlyalns49ZmZVuAnEMVDxXb33lv5NUEnxUiY,12992
4
+ airbyte_cdk/entrypoint.py,sha256=uX3MawH1qukzxFjdR1AFynG0l5vLof9X40m_AYTrP_8,13180
5
5
  airbyte_cdk/exception_handler.py,sha256=CwkiPdZ1WMOr3CBkvKFyHiyLerXGRqBrVlB4p0OImGI,1125
6
6
  airbyte_cdk/logger.py,sha256=4Mi2MEQi1uh59BP9Dxw_UEbZuxaJewqK_jvEU2b10nk,3985
7
7
  airbyte_cdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -24,7 +24,7 @@ airbyte_cdk/models/__init__.py,sha256=Kg8YHBqUsNWHlAw-u3ZGdG4dxLh7qBlHhqMRfamNCR
24
24
  airbyte_cdk/models/airbyte_protocol.py,sha256=DoJvnmGM3xMAZFTwA6_RGMiKSFqfE3ib_Ru0KJ65Ag4,100
25
25
  airbyte_cdk/models/well_known_types.py,sha256=KKfNbow2gdLoC1Z4hcXy_JR8m_acsB2ol7gQuEgjobw,117
26
26
  airbyte_cdk/sources/__init__.py,sha256=Ov7Uf03KPSZUmMZqZfUAK3tQwsdKjDQUDvTb-H0JyfA,1141
27
- airbyte_cdk/sources/abstract_source.py,sha256=_cUJPfxNrYIq7upQ17wQeIchC7jZYWDB-6XfDNA-yCM,12934
27
+ airbyte_cdk/sources/abstract_source.py,sha256=tlXXwCRBisbOu7pA2NYtiRolhHSCaKlXcET6dny5V9o,13043
28
28
  airbyte_cdk/sources/config.py,sha256=PYsY7y2u3EUwxLiEb96JnuKwH_E8CuxKggsRO2ZPSRc,856
29
29
  airbyte_cdk/sources/connector_state_manager.py,sha256=wsmUgII398MazCTKxwLBLzeiU6Z-tMTrKX882EEy-YE,10904
30
30
  airbyte_cdk/sources/http_config.py,sha256=OBZeuyFilm6NlDlBhFQvHhTWabEvZww6OHDIlZujIS0,730
@@ -164,7 +164,7 @@ airbyte_cdk/sources/file_based/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
164
164
  airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=WrV4sKtJoZ1dK31HK7NdBKlnYHkmu6NqjmEpkVqJ6tQ,4582
165
165
  airbyte_cdk/sources/file_based/config/avro_format.py,sha256=lQSEq5JZY0M5y9mW93R4EjrIb8brYXUgrXCY-6EMHww,711
166
166
  airbyte_cdk/sources/file_based/config/csv_format.py,sha256=L3JEgb91yrCob1oYrGl0088QEWblkOsRfDmMfWRQ0bg,7482
167
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py,sha256=zzD59S8e9aeYfVU5Dh_i7dpFe2qi_thQyLpQYNQcnm0,4008
167
+ airbyte_cdk/sources/file_based/config/file_based_stream_config.py,sha256=lE1_Uzcfoly63xIoNigRFT0urOKjf0FQ7fLMKH2AX-s,4153
168
168
  airbyte_cdk/sources/file_based/config/jsonl_format.py,sha256=fAPzZnoghGgHjaDvx6Qo68C8j54mBxo1NTdpwSI0VZo,374
169
169
  airbyte_cdk/sources/file_based/config/parquet_format.py,sha256=8GTDTQyvS7pWLVG0LWirHVE1snHd0Au5R4Ym33-ezEg,736
170
170
  airbyte_cdk/sources/file_based/config/unstructured_format.py,sha256=8yc0TMhlf1bcJc34IXzYkYHQ5HpGN4rt1f3zKSiCeYk,934
@@ -237,6 +237,7 @@ airbyte_cdk/sources/utils/slice_logger.py,sha256=YeWSoZeOsQp9oZK7mick2J8KFdiY726
237
237
  airbyte_cdk/sources/utils/transform.py,sha256=4GYmO6bq33HF-a1in0dKQKqUOYI1bWItyuYF875bSQg,9493
238
238
  airbyte_cdk/sources/utils/types.py,sha256=41ZQR681t5TUnOScij58d088sb99klH_ZENFcaYro_g,175
239
239
  airbyte_cdk/test/__init__.py,sha256=f_XdkOg4_63QT2k3BbKY34209lppwgw-svzfZstQEq4,199
240
+ airbyte_cdk/test/entrypoint_wrapper.py,sha256=Bu67gvYwlnejmRc4fifkn4wKNOof9TuNfinYRdUUWVE,5155
240
241
  airbyte_cdk/test/http/__init__.py,sha256=Gh2u6y10KXfvYqhC3Mm811U-b9FaoQMBxSW4hT2JJ6I,302
241
242
  airbyte_cdk/test/http/matcher.py,sha256=zkcnnteguIYLI8nrZ1egttw94OfQPgY1LUsPjjytYgY,1117
242
243
  airbyte_cdk/test/http/mocker.py,sha256=pKEEHnf_OsOv-czx1jyyftfvCy4fDgtl6k7SweytOrU,4472
@@ -356,9 +357,9 @@ unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slic
356
357
  unit_tests/sources/file_based/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
357
358
  unit_tests/sources/file_based/helpers.py,sha256=MZTwaWtX0a6TPbFcUMP-EgqBunK2wpoElgApCEE1bN4,2659
358
359
  unit_tests/sources/file_based/in_memory_files_source.py,sha256=r2yD6-_ABXG7_PIyTq4ACN21sHyg3g-Hd9dIgxfDQUk,8235
359
- unit_tests/sources/file_based/test_file_based_scenarios.py,sha256=9xVFaFFHjnzZziVmoVmLTULdxANt_zSrwVgANAVytl4,11564
360
+ unit_tests/sources/file_based/test_file_based_scenarios.py,sha256=CMlhlccQzVq65cqu8V_Kpo1Eafumv-2OPCRwWNBnbGk,11418
360
361
  unit_tests/sources/file_based/test_file_based_stream_reader.py,sha256=P6yTp7tbPfREzi5SXg4SSSql5nxiRV571YdOmwb_SzY,9219
361
- unit_tests/sources/file_based/test_scenarios.py,sha256=2-9pqnfva3RDRyODy0xcK6mxrP_mHH5vLrmBhqgZO8o,9703
362
+ unit_tests/sources/file_based/test_scenarios.py,sha256=4xRRBZhqe__QWP8Fd5oXCHDI-0qKjrhypPAWGvmWwDM,7888
362
363
  unit_tests/sources/file_based/test_schema_helpers.py,sha256=IYIDdLRK41RkSG_ZW2cagAt9krV4QLbkzu6r7vPx9Js,12047
363
364
  unit_tests/sources/file_based/availability_strategy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
364
365
  unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py,sha256=V9knz_3FHzk8k6R1JaiZgZjHHFNRa8pmH2PGqhRJ2UA,4432
@@ -377,12 +378,12 @@ unit_tests/sources/file_based/file_types/test_unstructured_parser.py,sha256=W6jb
377
378
  unit_tests/sources/file_based/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
378
379
  unit_tests/sources/file_based/scenarios/avro_scenarios.py,sha256=oeQUmCV7d2aTShreYc-PvVb4cWqLSsVwHfg-lcKjzPs,30554
379
380
  unit_tests/sources/file_based/scenarios/check_scenarios.py,sha256=0xkt21ASTnTAMP0RYJEsF3yMGsNN7wWOoG_tmzL9PYw,6750
380
- unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=PAvWvqUbmGyTts8xWSSx1lrMeXys1hOJ5b2jzfZaqNk,109990
381
+ unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=1u2oZse5VDVJVZBM4ZeLzP-6E7BDjhYCxHvWhQGFWBY,110050
381
382
  unit_tests/sources/file_based/scenarios/file_based_source_builder.py,sha256=wgb7l5VohcEvZT82ZpJcjINSrjuJtzJZS4zuZjdKpJ4,3874
382
383
  unit_tests/sources/file_based/scenarios/incremental_scenarios.py,sha256=B7YE2IbvgTH_v7DYQEuv7yn2IG15aKUvJ_7dA4d3Cg4,69413
383
384
  unit_tests/sources/file_based/scenarios/jsonl_scenarios.py,sha256=LsOf-tpjWNuwskPcgAMhMpQQ3iaHaD3PjPmt2M2zSzo,31839
384
385
  unit_tests/sources/file_based/scenarios/parquet_scenarios.py,sha256=MGgLCqkTJb8uNEwYZY3zbVVDZRSBKSmf2s8VMuYse_I,26549
385
- unit_tests/sources/file_based/scenarios/scenario_builder.py,sha256=feSSViayuoxTquoRhMUg4Lcui7dtwWHQ1Fe5y9igWSo,8728
386
+ unit_tests/sources/file_based/scenarios/scenario_builder.py,sha256=GeuAOg7eC6Wgx648-OoAcaQh19AdFLgeLTpN2vmyTaw,8894
386
387
  unit_tests/sources/file_based/scenarios/unstructured_scenarios.py,sha256=3jeDxyLh6LgwK0wMhU884fqSXG47H3AWvIQDD15jO6c,64973
387
388
  unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py,sha256=7CxIaqZxAGSPs4AtcKZ9FLVVYQPsS__uXi9wnQMKn3U,28322
388
389
  unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py,sha256=Try0knJN5wfoGNO38QGoLGIcqSceSAQsUWO42CusNYI,33005
@@ -411,7 +412,7 @@ unit_tests/sources/streams/concurrent/scenarios/__init__.py,sha256=4Hw-PX1-VgESL
411
412
  unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py,sha256=x77AQf8_O4dQ2aF1o800CzI0hOEyU8ayxoNdSOvxkhM,10495
412
413
  unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py,sha256=OD_9R5fHt5Nf7hH8m28-UDoZJkY8iUBJLI73kd-u2BE,5794
413
414
  unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py,sha256=v0yP5MRGYJAb9bp2yXnp5yUmYKJ6aAKjHcNHigL_ONY,13981
414
- unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py,sha256=sQpvIJa5-Iv03KZfC2sP2zB8XSPCZAjLpUMpNBOA-xM,3897
415
+ unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py,sha256=Z_4-ClsxBupmN7Pbl8lF9bkSA9wnjLtrgA9WR_8VRi8,3757
415
416
  unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py,sha256=KqCLsXB_9rV4hNdSPrNynK3G-UIsipqsZT6X0Z-iM5E,13175
416
417
  unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py,sha256=aMtEOpCkxH-v2BBOYj4xABzPKcDYh_jieGfaIp4hy9w,5727
417
418
  unit_tests/sources/streams/concurrent/scenarios/utils.py,sha256=Pl1F4asW8AvV6bV5W3Qg21GiLqfdMT_rOt1CsFA0aVM,1953
@@ -436,8 +437,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg
436
437
  unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
437
438
  unit_tests/utils/test_stream_status_utils.py,sha256=Xr8MZ2HWgTVIyMbywDvuYkRaUF4RZLQOT8-JjvcfR24,2970
438
439
  unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
439
- airbyte_cdk-0.55.3.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
440
- airbyte_cdk-0.55.3.dist-info/METADATA,sha256=NMmUmx3R4MAMSEd1mEkbL2kl7wiOK2Yta6L6AnvVxMU,11983
441
- airbyte_cdk-0.55.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
442
- airbyte_cdk-0.55.3.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
443
- airbyte_cdk-0.55.3.dist-info/RECORD,,
440
+ airbyte_cdk-0.55.5.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
441
+ airbyte_cdk-0.55.5.dist-info/METADATA,sha256=UGBscoQLxD10YK6vK2sziE_7IkU_JY03BSihYZcEJv0,11983
442
+ airbyte_cdk-0.55.5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
443
+ airbyte_cdk-0.55.5.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
444
+ airbyte_cdk-0.55.5.dist-info/RECORD,,
@@ -100,6 +100,7 @@ single_csv_scenario: TestScenario[InMemoryFilesSource] = (
100
100
  "title": "Primary Key",
101
101
  "description": "The column or columns (for a composite key) that serves as the unique identifier of a record.",
102
102
  "type": "string",
103
+ "airbyte_hidden": True,
103
104
  },
104
105
  "days_to_sync_if_history_is_full": {
105
106
  "title": "Days To Sync If History Is Full",
@@ -81,9 +81,13 @@ class TestScenario(Generic[SourceType]):
81
81
  for stream in self.source.streams(self.config):
82
82
  catalog["streams"].append(
83
83
  {
84
- "stream": stream.name,
84
+ "stream": {
85
+ "name": stream.name,
86
+ "json_schema": {},
87
+ "supported_sync_modes": [sync_mode.value],
88
+ },
85
89
  "sync_mode": sync_mode.value,
86
- "destination_sync_mode": "append",
90
+ "destination_sync_mode": "append"
87
91
  }
88
92
  )
89
93
 
@@ -8,7 +8,6 @@ import pytest
8
8
  from _pytest.capture import CaptureFixture
9
9
  from airbyte_cdk.sources.abstract_source import AbstractSource
10
10
  from freezegun import freeze_time
11
- from pytest import LogCaptureFixture
12
11
  from unit_tests.sources.file_based.scenarios.avro_scenarios import (
13
12
  avro_all_types_scenario,
14
13
  avro_file_with_double_as_number_scenario,
@@ -248,10 +247,8 @@ def test_file_based_discover(capsys: CaptureFixture[str], tmp_path: PosixPath, s
248
247
 
249
248
  @pytest.mark.parametrize("scenario", read_scenarios, ids=[s.name for s in read_scenarios])
250
249
  @freeze_time("2023-06-09T00:00:00Z")
251
- def test_file_based_read(
252
- capsys: CaptureFixture[str], caplog: LogCaptureFixture, tmp_path: PosixPath, scenario: TestScenario[AbstractSource]
253
- ) -> None:
254
- verify_read(capsys, caplog, tmp_path, scenario)
250
+ def test_file_based_read(scenario: TestScenario[AbstractSource]) -> None:
251
+ verify_read(scenario)
255
252
 
256
253
 
257
254
  @pytest.mark.parametrize("scenario", spec_scenarios, ids=[c.name for c in spec_scenarios])
@@ -11,11 +11,12 @@ import pytest
11
11
  from _pytest.capture import CaptureFixture
12
12
  from _pytest.reports import ExceptionInfo
13
13
  from airbyte_cdk.entrypoint import launch
14
- from airbyte_cdk.logger import AirbyteLogFormatter
15
14
  from airbyte_cdk.models import AirbyteAnalyticsTraceMessage, SyncMode
16
15
  from airbyte_cdk.sources import AbstractSource
16
+ from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput
17
+ from airbyte_cdk.test.entrypoint_wrapper import read as entrypoint_read
17
18
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
18
- from pytest import LogCaptureFixture
19
+ from airbyte_protocol.models import AirbyteLogMessage, AirbyteMessage, ConfiguredAirbyteCatalog
19
20
  from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenario
20
21
 
21
22
 
@@ -37,58 +38,51 @@ def verify_discover(capsys: CaptureFixture[str], tmp_path: PosixPath, scenario:
37
38
  _verify_expected_logs(logs, discover_logs)
38
39
 
39
40
 
40
- def verify_read(
41
- capsys: CaptureFixture[str], caplog: LogCaptureFixture, tmp_path: PosixPath, scenario: TestScenario[AbstractSource]
42
- ) -> None:
43
- caplog.handler.setFormatter(AirbyteLogFormatter())
41
+ def verify_read(scenario: TestScenario[AbstractSource]) -> None:
44
42
  if scenario.incremental_scenario_config:
45
- run_test_read_incremental(capsys, caplog, tmp_path, scenario)
43
+ run_test_read_incremental(scenario)
46
44
  else:
47
- run_test_read_full_refresh(capsys, caplog, tmp_path, scenario)
45
+ run_test_read_full_refresh(scenario)
48
46
 
49
47
 
50
- def run_test_read_full_refresh(
51
- capsys: CaptureFixture[str], caplog: LogCaptureFixture, tmp_path: PosixPath, scenario: TestScenario[AbstractSource]
52
- ) -> None:
48
+ def run_test_read_full_refresh(scenario: TestScenario[AbstractSource]) -> None:
53
49
  expected_exc, expected_msg = scenario.expected_read_error
54
50
  if expected_exc:
55
51
  with pytest.raises(expected_exc) as exc: # noqa
56
- read(capsys, caplog, tmp_path, scenario)
52
+ read(scenario)
57
53
  if expected_msg:
58
54
  assert expected_msg in get_error_message_from_exc(exc)
59
55
  else:
60
- output = read(capsys, caplog, tmp_path, scenario)
56
+ output = read(scenario)
61
57
  _verify_read_output(output, scenario)
62
58
 
63
59
 
64
- def run_test_read_incremental(
65
- capsys: CaptureFixture[str], caplog: LogCaptureFixture, tmp_path: PosixPath, scenario: TestScenario[AbstractSource]
66
- ) -> None:
60
+ def run_test_read_incremental(scenario: TestScenario[AbstractSource]) -> None:
67
61
  expected_exc, expected_msg = scenario.expected_read_error
68
62
  if expected_exc:
69
63
  with pytest.raises(expected_exc):
70
- read_with_state(capsys, caplog, tmp_path, scenario)
64
+ read_with_state(scenario)
71
65
  else:
72
- output = read_with_state(capsys, caplog, tmp_path, scenario)
66
+ output = read_with_state(scenario)
73
67
  _verify_read_output(output, scenario)
74
68
 
75
69
 
76
- def _verify_read_output(output: Dict[str, Any], scenario: TestScenario[AbstractSource]) -> None:
77
- records, logs = output["records"], output["logs"]
78
- logs = [log for log in logs if log.get("level") in scenario.log_levels]
70
+ def _verify_read_output(output: EntrypointOutput, scenario: TestScenario[AbstractSource]) -> None:
71
+ records, log_messages = output.records_and_state_messages, output.logs
72
+ logs = [message.log for message in log_messages if message.log.level.value in scenario.log_levels]
79
73
  expected_records = scenario.expected_records
80
74
  assert len(records) == len(expected_records)
81
75
  for actual, expected in zip(records, expected_records):
82
- if "record" in actual:
83
- assert len(actual["record"]["data"]) == len(expected["data"])
84
- for key, value in actual["record"]["data"].items():
76
+ if actual.record:
77
+ assert len(actual.record.data) == len(expected["data"])
78
+ for key, value in actual.record.data.items():
85
79
  if isinstance(value, float):
86
80
  assert math.isclose(value, expected["data"][key], abs_tol=1e-04)
87
81
  else:
88
82
  assert value == expected["data"][key]
89
- assert actual["record"]["stream"] == expected["stream"]
90
- elif "state" in actual:
91
- assert actual["state"]["data"] == expected
83
+ assert actual.record.stream == expected["stream"]
84
+ elif actual.state:
85
+ assert actual.state.data == expected
92
86
 
93
87
  if scenario.expected_logs:
94
88
  read_logs = scenario.expected_logs.get("read")
@@ -96,25 +90,25 @@ def _verify_read_output(output: Dict[str, Any], scenario: TestScenario[AbstractS
96
90
  _verify_expected_logs(logs, read_logs)
97
91
 
98
92
  if scenario.expected_analytics:
99
- analytics = output["analytics"]
93
+ analytics = output.analytics_messages
100
94
 
101
95
  _verify_analytics(analytics, scenario.expected_analytics)
102
96
 
103
97
 
104
- def _verify_analytics(analytics: List[Dict[str, Any]], expected_analytics: Optional[List[AirbyteAnalyticsTraceMessage]]) -> None:
98
+ def _verify_analytics(analytics: List[AirbyteMessage], expected_analytics: Optional[List[AirbyteAnalyticsTraceMessage]]) -> None:
105
99
  if expected_analytics:
106
100
  for actual, expected in zip(analytics, expected_analytics):
107
- actual_type, actual_value = actual["type"], actual["value"]
101
+ actual_type, actual_value = actual.trace.analytics.type, actual.trace.analytics.value
108
102
  expected_type = expected.type
109
103
  expected_value = expected.value
110
104
  assert actual_type == expected_type
111
105
  assert actual_value == expected_value
112
106
 
113
107
 
114
- def _verify_expected_logs(logs: List[Dict[str, Any]], expected_logs: Optional[List[Mapping[str, Any]]]) -> None:
108
+ def _verify_expected_logs(logs: List[AirbyteLogMessage], expected_logs: Optional[List[Mapping[str, Any]]]) -> None:
115
109
  if expected_logs:
116
110
  for actual, expected in zip(logs, expected_logs):
117
- actual_level, actual_message = actual["level"], actual["message"]
111
+ actual_level, actual_message = actual.level.value, actual.message
118
112
  expected_level = expected["level"]
119
113
  expected_message = expected["message"]
120
114
  assert actual_level == expected_level
@@ -172,55 +166,21 @@ def discover(capsys: CaptureFixture[str], tmp_path: PosixPath, scenario: TestSce
172
166
  }
173
167
 
174
168
 
175
- def read(
176
- capsys: CaptureFixture[str], caplog: LogCaptureFixture, tmp_path: PosixPath, scenario: TestScenario[AbstractSource]
177
- ) -> Dict[str, Any]:
178
- with caplog.handler.stream as logger_stream:
179
- launch(
180
- scenario.source,
181
- [
182
- "read",
183
- "--config",
184
- make_file(tmp_path / "config.json", scenario.config),
185
- "--catalog",
186
- make_file(tmp_path / "catalog.json", scenario.configured_catalog(SyncMode.full_refresh)),
187
- ],
188
- )
189
- captured = capsys.readouterr().out.splitlines() + logger_stream.getvalue().split("\n")[:-1]
190
-
191
- return {
192
- "records": [msg for msg in (json.loads(line) for line in captured) if msg["type"] == "RECORD"],
193
- "logs": [msg["log"] for msg in (json.loads(line) for line in captured) if msg["type"] == "LOG"],
194
- "analytics": [
195
- msg["trace"]["analytics"]
196
- for msg in (json.loads(line) for line in captured)
197
- if msg["type"] == "TRACE" and msg["trace"]["type"] == "ANALYTICS"
198
- ],
199
- }
200
-
201
-
202
- def read_with_state(
203
- capsys: CaptureFixture[str], caplog: LogCaptureFixture, tmp_path: PosixPath, scenario: TestScenario[AbstractSource]
204
- ) -> Dict[str, List[Any]]:
205
- launch(
169
+ def read(scenario: TestScenario[AbstractSource]) -> EntrypointOutput:
170
+ return entrypoint_read(
206
171
  scenario.source,
207
- [
208
- "read",
209
- "--config",
210
- make_file(tmp_path / "config.json", scenario.config),
211
- "--catalog",
212
- make_file(tmp_path / "catalog.json", scenario.configured_catalog(SyncMode.incremental)),
213
- "--state",
214
- make_file(tmp_path / "state.json", scenario.input_state()),
215
- ],
172
+ scenario.config,
173
+ ConfiguredAirbyteCatalog.parse_obj(scenario.configured_catalog(SyncMode.full_refresh)),
174
+ )
175
+
176
+
177
+ def read_with_state(scenario: TestScenario[AbstractSource]) -> EntrypointOutput:
178
+ return entrypoint_read(
179
+ scenario.source,
180
+ scenario.config,
181
+ ConfiguredAirbyteCatalog.parse_obj(scenario.configured_catalog(SyncMode.incremental)),
182
+ scenario.input_state(),
216
183
  )
217
- captured = capsys.readouterr()
218
- logs = caplog.records
219
- return {
220
- "records": [msg for msg in (json.loads(line) for line in captured.out.splitlines()) if msg["type"] in ("RECORD", "STATE")],
221
- "logs": [msg["log"] for msg in (json.loads(line) for line in captured.out.splitlines()) if msg["type"] == "LOG"]
222
- + [{"level": log.levelname, "message": log.message} for log in logs],
223
- }
224
184
 
225
185
 
226
186
  def make_file(path: Path, file_contents: Optional[Union[Mapping[str, Any], List[Mapping[str, Any]]]]) -> str:
@@ -7,7 +7,6 @@ from pathlib import PosixPath
7
7
  import pytest
8
8
  from _pytest.capture import CaptureFixture
9
9
  from freezegun import freeze_time
10
- from pytest import LogCaptureFixture
11
10
  from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenario
12
11
  from unit_tests.sources.file_based.test_scenarios import verify_discover, verify_read
13
12
  from unit_tests.sources.streams.concurrent.scenarios.incremental_scenarios import (
@@ -68,8 +67,8 @@ scenarios = [
68
67
 
69
68
  @pytest.mark.parametrize("scenario", scenarios, ids=[s.name for s in scenarios])
70
69
  @freeze_time("2023-06-09T00:00:00Z")
71
- def test_concurrent_read(capsys: CaptureFixture[str], caplog: LogCaptureFixture, tmp_path: PosixPath, scenario: TestScenario) -> None:
72
- verify_read(capsys, caplog, tmp_path, scenario)
70
+ def test_concurrent_read(scenario: TestScenario) -> None:
71
+ verify_read(scenario)
73
72
 
74
73
 
75
74
  @pytest.mark.parametrize("scenario", scenarios, ids=[s.name for s in scenarios])