airbyte-cdk 0.55.3__py3-none-any.whl → 0.55.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
airbyte_cdk/entrypoint.py CHANGED
@@ -106,6 +106,9 @@ class AirbyteEntrypoint(object):
106
106
  raw_config = self.source.read_config(parsed_args.config)
107
107
  config = self.source.configure(raw_config, temp_dir)
108
108
 
109
+ yield from [
110
+ self.airbyte_message_to_string(queued_message) for queued_message in self._emit_queued_messages(self.source)
111
+ ]
109
112
  if cmd == "check":
110
113
  yield from map(AirbyteEntrypoint.airbyte_message_to_string, self.check(source_spec, config))
111
114
  elif cmd == "discover":
@@ -19,7 +19,7 @@ from airbyte_cdk.models import (
19
19
  )
20
20
  from airbyte_cdk.models import Type as MessageType
21
21
  from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
22
- from airbyte_cdk.sources.message import MessageRepository
22
+ from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
23
23
  from airbyte_cdk.sources.source import Source
24
24
  from airbyte_cdk.sources.streams import Stream
25
25
  from airbyte_cdk.sources.streams.core import StreamData
@@ -31,6 +31,8 @@ from airbyte_cdk.utils.event_timing import create_timer
31
31
  from airbyte_cdk.utils.stream_status_utils import as_airbyte_message as stream_status_as_airbyte_message
32
32
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
33
33
 
34
+ _default_message_repository = InMemoryMessageRepository()
35
+
34
36
 
35
37
  class AbstractSource(Source, ABC):
36
38
  """
@@ -269,4 +271,4 @@ class AbstractSource(Source, ABC):
269
271
 
270
272
  @property
271
273
  def message_repository(self) -> Union[None, MessageRepository]:
272
- return None
274
+ return _default_message_repository
@@ -46,7 +46,9 @@ class FileBasedStreamConfig(BaseModel):
46
46
  description="The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.",
47
47
  )
48
48
  primary_key: Optional[str] = Field(
49
- title="Primary Key", description="The column or columns (for a composite key) that serves as the unique identifier of a record."
49
+ title="Primary Key",
50
+ description="The column or columns (for a composite key) that serves as the unique identifier of a record.",
51
+ airbyte_hidden=True, # Users can create/modify primary keys in the connection configuration so we shouldn't duplicate it here.
50
52
  )
51
53
  days_to_sync_if_history_is_full: int = Field(
52
54
  title="Days To Sync If History Is Full",
@@ -0,0 +1,116 @@
1
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2
+
3
+ """
4
+ The AirbyteEntrypoint is important because it is a service layer that orchestrate how we execute commands from the
5
+ [common interface](https://docs.airbyte.com/understanding-airbyte/airbyte-protocol#common-interface) through the source Python
6
+ implementation. There is some logic about which message we send to the platform and when which is relevant for integration testing. Other
7
+ than that, there are integrations point that are annoying to integrate with using Python code:
8
+ * Sources communicate with the platform using stdout. The implication is that the source could just print every message instead of
9
+ returning things to source.<method> or to using the message repository. WARNING: As part of integration testing, we will not support
10
+ messages that are simply printed. The reason is that capturing stdout relies on overriding sys.stdout (see
11
+ https://docs.python.org/3/library/contextlib.html#contextlib.redirect_stdout) which clashes with how pytest captures logs and brings
12
+ considerations for multithreaded applications. If code you work with uses `print` statements, please migrate to
13
+ source.message_repository to emit those messages
14
+ * The entrypoint interface relies on file being written on the file system
15
+ """
16
+
17
+ import json
18
+ import logging
19
+ import tempfile
20
+ from io import StringIO
21
+ from pathlib import Path
22
+ from typing import Any, List, Mapping, Optional, Union
23
+
24
+ from airbyte_cdk.entrypoint import AirbyteEntrypoint
25
+ from airbyte_cdk.logger import AirbyteLogFormatter
26
+ from airbyte_cdk.sources import Source
27
+ from airbyte_protocol.models import AirbyteLogMessage, AirbyteMessage, ConfiguredAirbyteCatalog, Level, TraceType, Type
28
+ from pydantic.error_wrappers import ValidationError
29
+
30
+
31
+ class EntrypointOutput:
32
+ def __init__(self, messages: List[str]):
33
+ try:
34
+ self._messages = [self._parse_message(message) for message in messages]
35
+ except ValidationError as exception:
36
+ raise ValueError("All messages are expected to be AirbyteMessage") from exception
37
+
38
+ @staticmethod
39
+ def _parse_message(message: str) -> AirbyteMessage:
40
+ try:
41
+ return AirbyteMessage.parse_obj(json.loads(message))
42
+ except (json.JSONDecodeError, ValidationError):
43
+ # The platform assumes that logs that are not of AirbyteMessage format are log messages
44
+ return AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message=message))
45
+
46
+ @property
47
+ def records_and_state_messages(self) -> List[AirbyteMessage]:
48
+ return self._get_message_by_types([Type.RECORD, Type.STATE])
49
+
50
+ @property
51
+ def records(self) -> List[AirbyteMessage]:
52
+ return self._get_message_by_types([Type.RECORD])
53
+
54
+ @property
55
+ def state_messages(self) -> List[AirbyteMessage]:
56
+ return self._get_message_by_types([Type.STATE])
57
+
58
+ @property
59
+ def logs(self) -> List[AirbyteMessage]:
60
+ return self._get_message_by_types([Type.LOG])
61
+
62
+ @property
63
+ def trace_messages(self) -> List[AirbyteMessage]:
64
+ return self._get_message_by_types([Type.TRACE])
65
+
66
+ @property
67
+ def analytics_messages(self) -> List[AirbyteMessage]:
68
+ return [message for message in self._get_message_by_types([Type.TRACE]) if message.trace.type == TraceType.ANALYTICS]
69
+
70
+ def _get_message_by_types(self, message_types: List[Type]) -> List[AirbyteMessage]:
71
+ return [message for message in self._messages if message.type in message_types]
72
+
73
+
74
+ def read(source: Source, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: Optional[Any] = None) -> EntrypointOutput:
75
+ """
76
+ config and state must be json serializable
77
+ """
78
+ log_capture_buffer = StringIO()
79
+ stream_handler = logging.StreamHandler(log_capture_buffer)
80
+ stream_handler.setLevel(logging.INFO)
81
+ stream_handler.setFormatter(AirbyteLogFormatter())
82
+ parent_logger = logging.getLogger("")
83
+ parent_logger.addHandler(stream_handler)
84
+
85
+ with tempfile.TemporaryDirectory() as tmp_directory:
86
+ tmp_directory_path = Path(tmp_directory)
87
+ args = [
88
+ "read",
89
+ "--config",
90
+ make_file(tmp_directory_path / "config.json", config),
91
+ "--catalog",
92
+ make_file(tmp_directory_path / "catalog.json", catalog.json()),
93
+ ]
94
+ if state:
95
+ args.extend(
96
+ [
97
+ "--state",
98
+ make_file(tmp_directory_path / "state.json", state),
99
+ ]
100
+ )
101
+ source_entrypoint = AirbyteEntrypoint(source)
102
+ parsed_args = source_entrypoint.parse_args(args)
103
+ messages = list(source_entrypoint.run(parsed_args))
104
+ captured_logs = log_capture_buffer.getvalue().split("\n")[:-1]
105
+
106
+ parent_logger.removeHandler(stream_handler)
107
+
108
+ return EntrypointOutput(messages + captured_logs)
109
+
110
+
111
+ def make_file(path: Path, file_contents: Optional[Union[str, Mapping[str, Any], List[Mapping[str, Any]]]]) -> str:
112
+ if isinstance(file_contents, str):
113
+ path.write_text(file_contents)
114
+ else:
115
+ path.write_text(json.dumps(file_contents))
116
+ return str(path)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 0.55.3
3
+ Version: 0.55.5
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://github.com/airbytehq/airbyte
6
6
  Author: Airbyte
@@ -1,7 +1,7 @@
1
1
  airbyte_cdk/__init__.py,sha256=OBQWv5rF_QTRpOiP6J8J8oTU-GGrfi18i1PRFpahKks,262
2
2
  airbyte_cdk/config_observation.py,sha256=3kjxv8xTwCnub2_fTWnMPRx0E7vly1BUeyXOSK15Ql4,3610
3
3
  airbyte_cdk/connector.py,sha256=LtTAmBFV1LBUz_fOEbQ_EvBhyUsz8AGOlDsvK8QOOo0,4396
4
- airbyte_cdk/entrypoint.py,sha256=upN2KOPYlyalns49ZmZVuAnEMVDxXb33lv5NUEnxUiY,12992
4
+ airbyte_cdk/entrypoint.py,sha256=uX3MawH1qukzxFjdR1AFynG0l5vLof9X40m_AYTrP_8,13180
5
5
  airbyte_cdk/exception_handler.py,sha256=CwkiPdZ1WMOr3CBkvKFyHiyLerXGRqBrVlB4p0OImGI,1125
6
6
  airbyte_cdk/logger.py,sha256=4Mi2MEQi1uh59BP9Dxw_UEbZuxaJewqK_jvEU2b10nk,3985
7
7
  airbyte_cdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -24,7 +24,7 @@ airbyte_cdk/models/__init__.py,sha256=Kg8YHBqUsNWHlAw-u3ZGdG4dxLh7qBlHhqMRfamNCR
24
24
  airbyte_cdk/models/airbyte_protocol.py,sha256=DoJvnmGM3xMAZFTwA6_RGMiKSFqfE3ib_Ru0KJ65Ag4,100
25
25
  airbyte_cdk/models/well_known_types.py,sha256=KKfNbow2gdLoC1Z4hcXy_JR8m_acsB2ol7gQuEgjobw,117
26
26
  airbyte_cdk/sources/__init__.py,sha256=Ov7Uf03KPSZUmMZqZfUAK3tQwsdKjDQUDvTb-H0JyfA,1141
27
- airbyte_cdk/sources/abstract_source.py,sha256=_cUJPfxNrYIq7upQ17wQeIchC7jZYWDB-6XfDNA-yCM,12934
27
+ airbyte_cdk/sources/abstract_source.py,sha256=tlXXwCRBisbOu7pA2NYtiRolhHSCaKlXcET6dny5V9o,13043
28
28
  airbyte_cdk/sources/config.py,sha256=PYsY7y2u3EUwxLiEb96JnuKwH_E8CuxKggsRO2ZPSRc,856
29
29
  airbyte_cdk/sources/connector_state_manager.py,sha256=wsmUgII398MazCTKxwLBLzeiU6Z-tMTrKX882EEy-YE,10904
30
30
  airbyte_cdk/sources/http_config.py,sha256=OBZeuyFilm6NlDlBhFQvHhTWabEvZww6OHDIlZujIS0,730
@@ -164,7 +164,7 @@ airbyte_cdk/sources/file_based/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
164
164
  airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=WrV4sKtJoZ1dK31HK7NdBKlnYHkmu6NqjmEpkVqJ6tQ,4582
165
165
  airbyte_cdk/sources/file_based/config/avro_format.py,sha256=lQSEq5JZY0M5y9mW93R4EjrIb8brYXUgrXCY-6EMHww,711
166
166
  airbyte_cdk/sources/file_based/config/csv_format.py,sha256=L3JEgb91yrCob1oYrGl0088QEWblkOsRfDmMfWRQ0bg,7482
167
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py,sha256=zzD59S8e9aeYfVU5Dh_i7dpFe2qi_thQyLpQYNQcnm0,4008
167
+ airbyte_cdk/sources/file_based/config/file_based_stream_config.py,sha256=lE1_Uzcfoly63xIoNigRFT0urOKjf0FQ7fLMKH2AX-s,4153
168
168
  airbyte_cdk/sources/file_based/config/jsonl_format.py,sha256=fAPzZnoghGgHjaDvx6Qo68C8j54mBxo1NTdpwSI0VZo,374
169
169
  airbyte_cdk/sources/file_based/config/parquet_format.py,sha256=8GTDTQyvS7pWLVG0LWirHVE1snHd0Au5R4Ym33-ezEg,736
170
170
  airbyte_cdk/sources/file_based/config/unstructured_format.py,sha256=8yc0TMhlf1bcJc34IXzYkYHQ5HpGN4rt1f3zKSiCeYk,934
@@ -237,6 +237,7 @@ airbyte_cdk/sources/utils/slice_logger.py,sha256=YeWSoZeOsQp9oZK7mick2J8KFdiY726
237
237
  airbyte_cdk/sources/utils/transform.py,sha256=4GYmO6bq33HF-a1in0dKQKqUOYI1bWItyuYF875bSQg,9493
238
238
  airbyte_cdk/sources/utils/types.py,sha256=41ZQR681t5TUnOScij58d088sb99klH_ZENFcaYro_g,175
239
239
  airbyte_cdk/test/__init__.py,sha256=f_XdkOg4_63QT2k3BbKY34209lppwgw-svzfZstQEq4,199
240
+ airbyte_cdk/test/entrypoint_wrapper.py,sha256=Bu67gvYwlnejmRc4fifkn4wKNOof9TuNfinYRdUUWVE,5155
240
241
  airbyte_cdk/test/http/__init__.py,sha256=Gh2u6y10KXfvYqhC3Mm811U-b9FaoQMBxSW4hT2JJ6I,302
241
242
  airbyte_cdk/test/http/matcher.py,sha256=zkcnnteguIYLI8nrZ1egttw94OfQPgY1LUsPjjytYgY,1117
242
243
  airbyte_cdk/test/http/mocker.py,sha256=pKEEHnf_OsOv-czx1jyyftfvCy4fDgtl6k7SweytOrU,4472
@@ -356,9 +357,9 @@ unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slic
356
357
  unit_tests/sources/file_based/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
357
358
  unit_tests/sources/file_based/helpers.py,sha256=MZTwaWtX0a6TPbFcUMP-EgqBunK2wpoElgApCEE1bN4,2659
358
359
  unit_tests/sources/file_based/in_memory_files_source.py,sha256=r2yD6-_ABXG7_PIyTq4ACN21sHyg3g-Hd9dIgxfDQUk,8235
359
- unit_tests/sources/file_based/test_file_based_scenarios.py,sha256=9xVFaFFHjnzZziVmoVmLTULdxANt_zSrwVgANAVytl4,11564
360
+ unit_tests/sources/file_based/test_file_based_scenarios.py,sha256=CMlhlccQzVq65cqu8V_Kpo1Eafumv-2OPCRwWNBnbGk,11418
360
361
  unit_tests/sources/file_based/test_file_based_stream_reader.py,sha256=P6yTp7tbPfREzi5SXg4SSSql5nxiRV571YdOmwb_SzY,9219
361
- unit_tests/sources/file_based/test_scenarios.py,sha256=2-9pqnfva3RDRyODy0xcK6mxrP_mHH5vLrmBhqgZO8o,9703
362
+ unit_tests/sources/file_based/test_scenarios.py,sha256=4xRRBZhqe__QWP8Fd5oXCHDI-0qKjrhypPAWGvmWwDM,7888
362
363
  unit_tests/sources/file_based/test_schema_helpers.py,sha256=IYIDdLRK41RkSG_ZW2cagAt9krV4QLbkzu6r7vPx9Js,12047
363
364
  unit_tests/sources/file_based/availability_strategy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
364
365
  unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py,sha256=V9knz_3FHzk8k6R1JaiZgZjHHFNRa8pmH2PGqhRJ2UA,4432
@@ -377,12 +378,12 @@ unit_tests/sources/file_based/file_types/test_unstructured_parser.py,sha256=W6jb
377
378
  unit_tests/sources/file_based/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
378
379
  unit_tests/sources/file_based/scenarios/avro_scenarios.py,sha256=oeQUmCV7d2aTShreYc-PvVb4cWqLSsVwHfg-lcKjzPs,30554
379
380
  unit_tests/sources/file_based/scenarios/check_scenarios.py,sha256=0xkt21ASTnTAMP0RYJEsF3yMGsNN7wWOoG_tmzL9PYw,6750
380
- unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=PAvWvqUbmGyTts8xWSSx1lrMeXys1hOJ5b2jzfZaqNk,109990
381
+ unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=1u2oZse5VDVJVZBM4ZeLzP-6E7BDjhYCxHvWhQGFWBY,110050
381
382
  unit_tests/sources/file_based/scenarios/file_based_source_builder.py,sha256=wgb7l5VohcEvZT82ZpJcjINSrjuJtzJZS4zuZjdKpJ4,3874
382
383
  unit_tests/sources/file_based/scenarios/incremental_scenarios.py,sha256=B7YE2IbvgTH_v7DYQEuv7yn2IG15aKUvJ_7dA4d3Cg4,69413
383
384
  unit_tests/sources/file_based/scenarios/jsonl_scenarios.py,sha256=LsOf-tpjWNuwskPcgAMhMpQQ3iaHaD3PjPmt2M2zSzo,31839
384
385
  unit_tests/sources/file_based/scenarios/parquet_scenarios.py,sha256=MGgLCqkTJb8uNEwYZY3zbVVDZRSBKSmf2s8VMuYse_I,26549
385
- unit_tests/sources/file_based/scenarios/scenario_builder.py,sha256=feSSViayuoxTquoRhMUg4Lcui7dtwWHQ1Fe5y9igWSo,8728
386
+ unit_tests/sources/file_based/scenarios/scenario_builder.py,sha256=GeuAOg7eC6Wgx648-OoAcaQh19AdFLgeLTpN2vmyTaw,8894
386
387
  unit_tests/sources/file_based/scenarios/unstructured_scenarios.py,sha256=3jeDxyLh6LgwK0wMhU884fqSXG47H3AWvIQDD15jO6c,64973
387
388
  unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py,sha256=7CxIaqZxAGSPs4AtcKZ9FLVVYQPsS__uXi9wnQMKn3U,28322
388
389
  unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py,sha256=Try0knJN5wfoGNO38QGoLGIcqSceSAQsUWO42CusNYI,33005
@@ -411,7 +412,7 @@ unit_tests/sources/streams/concurrent/scenarios/__init__.py,sha256=4Hw-PX1-VgESL
411
412
  unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py,sha256=x77AQf8_O4dQ2aF1o800CzI0hOEyU8ayxoNdSOvxkhM,10495
412
413
  unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py,sha256=OD_9R5fHt5Nf7hH8m28-UDoZJkY8iUBJLI73kd-u2BE,5794
413
414
  unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py,sha256=v0yP5MRGYJAb9bp2yXnp5yUmYKJ6aAKjHcNHigL_ONY,13981
414
- unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py,sha256=sQpvIJa5-Iv03KZfC2sP2zB8XSPCZAjLpUMpNBOA-xM,3897
415
+ unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py,sha256=Z_4-ClsxBupmN7Pbl8lF9bkSA9wnjLtrgA9WR_8VRi8,3757
415
416
  unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py,sha256=KqCLsXB_9rV4hNdSPrNynK3G-UIsipqsZT6X0Z-iM5E,13175
416
417
  unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py,sha256=aMtEOpCkxH-v2BBOYj4xABzPKcDYh_jieGfaIp4hy9w,5727
417
418
  unit_tests/sources/streams/concurrent/scenarios/utils.py,sha256=Pl1F4asW8AvV6bV5W3Qg21GiLqfdMT_rOt1CsFA0aVM,1953
@@ -436,8 +437,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg
436
437
  unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
437
438
  unit_tests/utils/test_stream_status_utils.py,sha256=Xr8MZ2HWgTVIyMbywDvuYkRaUF4RZLQOT8-JjvcfR24,2970
438
439
  unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
439
- airbyte_cdk-0.55.3.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
440
- airbyte_cdk-0.55.3.dist-info/METADATA,sha256=NMmUmx3R4MAMSEd1mEkbL2kl7wiOK2Yta6L6AnvVxMU,11983
441
- airbyte_cdk-0.55.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
442
- airbyte_cdk-0.55.3.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
443
- airbyte_cdk-0.55.3.dist-info/RECORD,,
440
+ airbyte_cdk-0.55.5.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
441
+ airbyte_cdk-0.55.5.dist-info/METADATA,sha256=UGBscoQLxD10YK6vK2sziE_7IkU_JY03BSihYZcEJv0,11983
442
+ airbyte_cdk-0.55.5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
443
+ airbyte_cdk-0.55.5.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
444
+ airbyte_cdk-0.55.5.dist-info/RECORD,,
@@ -100,6 +100,7 @@ single_csv_scenario: TestScenario[InMemoryFilesSource] = (
100
100
  "title": "Primary Key",
101
101
  "description": "The column or columns (for a composite key) that serves as the unique identifier of a record.",
102
102
  "type": "string",
103
+ "airbyte_hidden": True,
103
104
  },
104
105
  "days_to_sync_if_history_is_full": {
105
106
  "title": "Days To Sync If History Is Full",
@@ -81,9 +81,13 @@ class TestScenario(Generic[SourceType]):
81
81
  for stream in self.source.streams(self.config):
82
82
  catalog["streams"].append(
83
83
  {
84
- "stream": stream.name,
84
+ "stream": {
85
+ "name": stream.name,
86
+ "json_schema": {},
87
+ "supported_sync_modes": [sync_mode.value],
88
+ },
85
89
  "sync_mode": sync_mode.value,
86
- "destination_sync_mode": "append",
90
+ "destination_sync_mode": "append"
87
91
  }
88
92
  )
89
93
 
@@ -8,7 +8,6 @@ import pytest
8
8
  from _pytest.capture import CaptureFixture
9
9
  from airbyte_cdk.sources.abstract_source import AbstractSource
10
10
  from freezegun import freeze_time
11
- from pytest import LogCaptureFixture
12
11
  from unit_tests.sources.file_based.scenarios.avro_scenarios import (
13
12
  avro_all_types_scenario,
14
13
  avro_file_with_double_as_number_scenario,
@@ -248,10 +247,8 @@ def test_file_based_discover(capsys: CaptureFixture[str], tmp_path: PosixPath, s
248
247
 
249
248
  @pytest.mark.parametrize("scenario", read_scenarios, ids=[s.name for s in read_scenarios])
250
249
  @freeze_time("2023-06-09T00:00:00Z")
251
- def test_file_based_read(
252
- capsys: CaptureFixture[str], caplog: LogCaptureFixture, tmp_path: PosixPath, scenario: TestScenario[AbstractSource]
253
- ) -> None:
254
- verify_read(capsys, caplog, tmp_path, scenario)
250
+ def test_file_based_read(scenario: TestScenario[AbstractSource]) -> None:
251
+ verify_read(scenario)
255
252
 
256
253
 
257
254
  @pytest.mark.parametrize("scenario", spec_scenarios, ids=[c.name for c in spec_scenarios])
@@ -11,11 +11,12 @@ import pytest
11
11
  from _pytest.capture import CaptureFixture
12
12
  from _pytest.reports import ExceptionInfo
13
13
  from airbyte_cdk.entrypoint import launch
14
- from airbyte_cdk.logger import AirbyteLogFormatter
15
14
  from airbyte_cdk.models import AirbyteAnalyticsTraceMessage, SyncMode
16
15
  from airbyte_cdk.sources import AbstractSource
16
+ from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput
17
+ from airbyte_cdk.test.entrypoint_wrapper import read as entrypoint_read
17
18
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
18
- from pytest import LogCaptureFixture
19
+ from airbyte_protocol.models import AirbyteLogMessage, AirbyteMessage, ConfiguredAirbyteCatalog
19
20
  from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenario
20
21
 
21
22
 
@@ -37,58 +38,51 @@ def verify_discover(capsys: CaptureFixture[str], tmp_path: PosixPath, scenario:
37
38
  _verify_expected_logs(logs, discover_logs)
38
39
 
39
40
 
40
- def verify_read(
41
- capsys: CaptureFixture[str], caplog: LogCaptureFixture, tmp_path: PosixPath, scenario: TestScenario[AbstractSource]
42
- ) -> None:
43
- caplog.handler.setFormatter(AirbyteLogFormatter())
41
+ def verify_read(scenario: TestScenario[AbstractSource]) -> None:
44
42
  if scenario.incremental_scenario_config:
45
- run_test_read_incremental(capsys, caplog, tmp_path, scenario)
43
+ run_test_read_incremental(scenario)
46
44
  else:
47
- run_test_read_full_refresh(capsys, caplog, tmp_path, scenario)
45
+ run_test_read_full_refresh(scenario)
48
46
 
49
47
 
50
- def run_test_read_full_refresh(
51
- capsys: CaptureFixture[str], caplog: LogCaptureFixture, tmp_path: PosixPath, scenario: TestScenario[AbstractSource]
52
- ) -> None:
48
+ def run_test_read_full_refresh(scenario: TestScenario[AbstractSource]) -> None:
53
49
  expected_exc, expected_msg = scenario.expected_read_error
54
50
  if expected_exc:
55
51
  with pytest.raises(expected_exc) as exc: # noqa
56
- read(capsys, caplog, tmp_path, scenario)
52
+ read(scenario)
57
53
  if expected_msg:
58
54
  assert expected_msg in get_error_message_from_exc(exc)
59
55
  else:
60
- output = read(capsys, caplog, tmp_path, scenario)
56
+ output = read(scenario)
61
57
  _verify_read_output(output, scenario)
62
58
 
63
59
 
64
- def run_test_read_incremental(
65
- capsys: CaptureFixture[str], caplog: LogCaptureFixture, tmp_path: PosixPath, scenario: TestScenario[AbstractSource]
66
- ) -> None:
60
+ def run_test_read_incremental(scenario: TestScenario[AbstractSource]) -> None:
67
61
  expected_exc, expected_msg = scenario.expected_read_error
68
62
  if expected_exc:
69
63
  with pytest.raises(expected_exc):
70
- read_with_state(capsys, caplog, tmp_path, scenario)
64
+ read_with_state(scenario)
71
65
  else:
72
- output = read_with_state(capsys, caplog, tmp_path, scenario)
66
+ output = read_with_state(scenario)
73
67
  _verify_read_output(output, scenario)
74
68
 
75
69
 
76
- def _verify_read_output(output: Dict[str, Any], scenario: TestScenario[AbstractSource]) -> None:
77
- records, logs = output["records"], output["logs"]
78
- logs = [log for log in logs if log.get("level") in scenario.log_levels]
70
+ def _verify_read_output(output: EntrypointOutput, scenario: TestScenario[AbstractSource]) -> None:
71
+ records, log_messages = output.records_and_state_messages, output.logs
72
+ logs = [message.log for message in log_messages if message.log.level.value in scenario.log_levels]
79
73
  expected_records = scenario.expected_records
80
74
  assert len(records) == len(expected_records)
81
75
  for actual, expected in zip(records, expected_records):
82
- if "record" in actual:
83
- assert len(actual["record"]["data"]) == len(expected["data"])
84
- for key, value in actual["record"]["data"].items():
76
+ if actual.record:
77
+ assert len(actual.record.data) == len(expected["data"])
78
+ for key, value in actual.record.data.items():
85
79
  if isinstance(value, float):
86
80
  assert math.isclose(value, expected["data"][key], abs_tol=1e-04)
87
81
  else:
88
82
  assert value == expected["data"][key]
89
- assert actual["record"]["stream"] == expected["stream"]
90
- elif "state" in actual:
91
- assert actual["state"]["data"] == expected
83
+ assert actual.record.stream == expected["stream"]
84
+ elif actual.state:
85
+ assert actual.state.data == expected
92
86
 
93
87
  if scenario.expected_logs:
94
88
  read_logs = scenario.expected_logs.get("read")
@@ -96,25 +90,25 @@ def _verify_read_output(output: Dict[str, Any], scenario: TestScenario[AbstractS
96
90
  _verify_expected_logs(logs, read_logs)
97
91
 
98
92
  if scenario.expected_analytics:
99
- analytics = output["analytics"]
93
+ analytics = output.analytics_messages
100
94
 
101
95
  _verify_analytics(analytics, scenario.expected_analytics)
102
96
 
103
97
 
104
- def _verify_analytics(analytics: List[Dict[str, Any]], expected_analytics: Optional[List[AirbyteAnalyticsTraceMessage]]) -> None:
98
+ def _verify_analytics(analytics: List[AirbyteMessage], expected_analytics: Optional[List[AirbyteAnalyticsTraceMessage]]) -> None:
105
99
  if expected_analytics:
106
100
  for actual, expected in zip(analytics, expected_analytics):
107
- actual_type, actual_value = actual["type"], actual["value"]
101
+ actual_type, actual_value = actual.trace.analytics.type, actual.trace.analytics.value
108
102
  expected_type = expected.type
109
103
  expected_value = expected.value
110
104
  assert actual_type == expected_type
111
105
  assert actual_value == expected_value
112
106
 
113
107
 
114
- def _verify_expected_logs(logs: List[Dict[str, Any]], expected_logs: Optional[List[Mapping[str, Any]]]) -> None:
108
+ def _verify_expected_logs(logs: List[AirbyteLogMessage], expected_logs: Optional[List[Mapping[str, Any]]]) -> None:
115
109
  if expected_logs:
116
110
  for actual, expected in zip(logs, expected_logs):
117
- actual_level, actual_message = actual["level"], actual["message"]
111
+ actual_level, actual_message = actual.level.value, actual.message
118
112
  expected_level = expected["level"]
119
113
  expected_message = expected["message"]
120
114
  assert actual_level == expected_level
@@ -172,55 +166,21 @@ def discover(capsys: CaptureFixture[str], tmp_path: PosixPath, scenario: TestSce
172
166
  }
173
167
 
174
168
 
175
- def read(
176
- capsys: CaptureFixture[str], caplog: LogCaptureFixture, tmp_path: PosixPath, scenario: TestScenario[AbstractSource]
177
- ) -> Dict[str, Any]:
178
- with caplog.handler.stream as logger_stream:
179
- launch(
180
- scenario.source,
181
- [
182
- "read",
183
- "--config",
184
- make_file(tmp_path / "config.json", scenario.config),
185
- "--catalog",
186
- make_file(tmp_path / "catalog.json", scenario.configured_catalog(SyncMode.full_refresh)),
187
- ],
188
- )
189
- captured = capsys.readouterr().out.splitlines() + logger_stream.getvalue().split("\n")[:-1]
190
-
191
- return {
192
- "records": [msg for msg in (json.loads(line) for line in captured) if msg["type"] == "RECORD"],
193
- "logs": [msg["log"] for msg in (json.loads(line) for line in captured) if msg["type"] == "LOG"],
194
- "analytics": [
195
- msg["trace"]["analytics"]
196
- for msg in (json.loads(line) for line in captured)
197
- if msg["type"] == "TRACE" and msg["trace"]["type"] == "ANALYTICS"
198
- ],
199
- }
200
-
201
-
202
- def read_with_state(
203
- capsys: CaptureFixture[str], caplog: LogCaptureFixture, tmp_path: PosixPath, scenario: TestScenario[AbstractSource]
204
- ) -> Dict[str, List[Any]]:
205
- launch(
169
+ def read(scenario: TestScenario[AbstractSource]) -> EntrypointOutput:
170
+ return entrypoint_read(
206
171
  scenario.source,
207
- [
208
- "read",
209
- "--config",
210
- make_file(tmp_path / "config.json", scenario.config),
211
- "--catalog",
212
- make_file(tmp_path / "catalog.json", scenario.configured_catalog(SyncMode.incremental)),
213
- "--state",
214
- make_file(tmp_path / "state.json", scenario.input_state()),
215
- ],
172
+ scenario.config,
173
+ ConfiguredAirbyteCatalog.parse_obj(scenario.configured_catalog(SyncMode.full_refresh)),
174
+ )
175
+
176
+
177
+ def read_with_state(scenario: TestScenario[AbstractSource]) -> EntrypointOutput:
178
+ return entrypoint_read(
179
+ scenario.source,
180
+ scenario.config,
181
+ ConfiguredAirbyteCatalog.parse_obj(scenario.configured_catalog(SyncMode.incremental)),
182
+ scenario.input_state(),
216
183
  )
217
- captured = capsys.readouterr()
218
- logs = caplog.records
219
- return {
220
- "records": [msg for msg in (json.loads(line) for line in captured.out.splitlines()) if msg["type"] in ("RECORD", "STATE")],
221
- "logs": [msg["log"] for msg in (json.loads(line) for line in captured.out.splitlines()) if msg["type"] == "LOG"]
222
- + [{"level": log.levelname, "message": log.message} for log in logs],
223
- }
224
184
 
225
185
 
226
186
  def make_file(path: Path, file_contents: Optional[Union[Mapping[str, Any], List[Mapping[str, Any]]]]) -> str:
@@ -7,7 +7,6 @@ from pathlib import PosixPath
7
7
  import pytest
8
8
  from _pytest.capture import CaptureFixture
9
9
  from freezegun import freeze_time
10
- from pytest import LogCaptureFixture
11
10
  from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenario
12
11
  from unit_tests.sources.file_based.test_scenarios import verify_discover, verify_read
13
12
  from unit_tests.sources.streams.concurrent.scenarios.incremental_scenarios import (
@@ -68,8 +67,8 @@ scenarios = [
68
67
 
69
68
  @pytest.mark.parametrize("scenario", scenarios, ids=[s.name for s in scenarios])
70
69
  @freeze_time("2023-06-09T00:00:00Z")
71
- def test_concurrent_read(capsys: CaptureFixture[str], caplog: LogCaptureFixture, tmp_path: PosixPath, scenario: TestScenario) -> None:
72
- verify_read(capsys, caplog, tmp_path, scenario)
70
+ def test_concurrent_read(scenario: TestScenario) -> None:
71
+ verify_read(scenario)
73
72
 
74
73
 
75
74
  @pytest.mark.parametrize("scenario", scenarios, ids=[s.name for s in scenarios])