airbyte-cdk 0.55.3__py3-none-any.whl → 0.55.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/entrypoint.py +3 -0
- airbyte_cdk/sources/abstract_source.py +4 -2
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +3 -1
- airbyte_cdk/test/entrypoint_wrapper.py +116 -0
- {airbyte_cdk-0.55.3.dist-info → airbyte_cdk-0.55.5.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.55.3.dist-info → airbyte_cdk-0.55.5.dist-info}/RECORD +14 -13
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +1 -0
- unit_tests/sources/file_based/scenarios/scenario_builder.py +6 -2
- unit_tests/sources/file_based/test_file_based_scenarios.py +2 -5
- unit_tests/sources/file_based/test_scenarios.py +39 -79
- unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +2 -3
- {airbyte_cdk-0.55.3.dist-info → airbyte_cdk-0.55.5.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.55.3.dist-info → airbyte_cdk-0.55.5.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.55.3.dist-info → airbyte_cdk-0.55.5.dist-info}/top_level.txt +0 -0
airbyte_cdk/entrypoint.py
CHANGED
@@ -106,6 +106,9 @@ class AirbyteEntrypoint(object):
|
|
106
106
|
raw_config = self.source.read_config(parsed_args.config)
|
107
107
|
config = self.source.configure(raw_config, temp_dir)
|
108
108
|
|
109
|
+
yield from [
|
110
|
+
self.airbyte_message_to_string(queued_message) for queued_message in self._emit_queued_messages(self.source)
|
111
|
+
]
|
109
112
|
if cmd == "check":
|
110
113
|
yield from map(AirbyteEntrypoint.airbyte_message_to_string, self.check(source_spec, config))
|
111
114
|
elif cmd == "discover":
|
@@ -19,7 +19,7 @@ from airbyte_cdk.models import (
|
|
19
19
|
)
|
20
20
|
from airbyte_cdk.models import Type as MessageType
|
21
21
|
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
22
|
-
from airbyte_cdk.sources.message import MessageRepository
|
22
|
+
from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
|
23
23
|
from airbyte_cdk.sources.source import Source
|
24
24
|
from airbyte_cdk.sources.streams import Stream
|
25
25
|
from airbyte_cdk.sources.streams.core import StreamData
|
@@ -31,6 +31,8 @@ from airbyte_cdk.utils.event_timing import create_timer
|
|
31
31
|
from airbyte_cdk.utils.stream_status_utils import as_airbyte_message as stream_status_as_airbyte_message
|
32
32
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
33
33
|
|
34
|
+
_default_message_repository = InMemoryMessageRepository()
|
35
|
+
|
34
36
|
|
35
37
|
class AbstractSource(Source, ABC):
|
36
38
|
"""
|
@@ -269,4 +271,4 @@ class AbstractSource(Source, ABC):
|
|
269
271
|
|
270
272
|
@property
|
271
273
|
def message_repository(self) -> Union[None, MessageRepository]:
|
272
|
-
return
|
274
|
+
return _default_message_repository
|
@@ -46,7 +46,9 @@ class FileBasedStreamConfig(BaseModel):
|
|
46
46
|
description="The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.",
|
47
47
|
)
|
48
48
|
primary_key: Optional[str] = Field(
|
49
|
-
title="Primary Key",
|
49
|
+
title="Primary Key",
|
50
|
+
description="The column or columns (for a composite key) that serves as the unique identifier of a record.",
|
51
|
+
airbyte_hidden=True, # Users can create/modify primary keys in the connection configuration so we shouldn't duplicate it here.
|
50
52
|
)
|
51
53
|
days_to_sync_if_history_is_full: int = Field(
|
52
54
|
title="Days To Sync If History Is Full",
|
@@ -0,0 +1,116 @@
|
|
1
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
"""
|
4
|
+
The AirbyteEntrypoint is important because it is a service layer that orchestrate how we execute commands from the
|
5
|
+
[common interface](https://docs.airbyte.com/understanding-airbyte/airbyte-protocol#common-interface) through the source Python
|
6
|
+
implementation. There is some logic about which message we send to the platform and when which is relevant for integration testing. Other
|
7
|
+
than that, there are integrations point that are annoying to integrate with using Python code:
|
8
|
+
* Sources communicate with the platform using stdout. The implication is that the source could just print every message instead of
|
9
|
+
returning things to source.<method> or to using the message repository. WARNING: As part of integration testing, we will not support
|
10
|
+
messages that are simply printed. The reason is that capturing stdout relies on overriding sys.stdout (see
|
11
|
+
https://docs.python.org/3/library/contextlib.html#contextlib.redirect_stdout) which clashes with how pytest captures logs and brings
|
12
|
+
considerations for multithreaded applications. If code you work with uses `print` statements, please migrate to
|
13
|
+
source.message_repository to emit those messages
|
14
|
+
* The entrypoint interface relies on file being written on the file system
|
15
|
+
"""
|
16
|
+
|
17
|
+
import json
|
18
|
+
import logging
|
19
|
+
import tempfile
|
20
|
+
from io import StringIO
|
21
|
+
from pathlib import Path
|
22
|
+
from typing import Any, List, Mapping, Optional, Union
|
23
|
+
|
24
|
+
from airbyte_cdk.entrypoint import AirbyteEntrypoint
|
25
|
+
from airbyte_cdk.logger import AirbyteLogFormatter
|
26
|
+
from airbyte_cdk.sources import Source
|
27
|
+
from airbyte_protocol.models import AirbyteLogMessage, AirbyteMessage, ConfiguredAirbyteCatalog, Level, TraceType, Type
|
28
|
+
from pydantic.error_wrappers import ValidationError
|
29
|
+
|
30
|
+
|
31
|
+
class EntrypointOutput:
|
32
|
+
def __init__(self, messages: List[str]):
|
33
|
+
try:
|
34
|
+
self._messages = [self._parse_message(message) for message in messages]
|
35
|
+
except ValidationError as exception:
|
36
|
+
raise ValueError("All messages are expected to be AirbyteMessage") from exception
|
37
|
+
|
38
|
+
@staticmethod
|
39
|
+
def _parse_message(message: str) -> AirbyteMessage:
|
40
|
+
try:
|
41
|
+
return AirbyteMessage.parse_obj(json.loads(message))
|
42
|
+
except (json.JSONDecodeError, ValidationError):
|
43
|
+
# The platform assumes that logs that are not of AirbyteMessage format are log messages
|
44
|
+
return AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message=message))
|
45
|
+
|
46
|
+
@property
|
47
|
+
def records_and_state_messages(self) -> List[AirbyteMessage]:
|
48
|
+
return self._get_message_by_types([Type.RECORD, Type.STATE])
|
49
|
+
|
50
|
+
@property
|
51
|
+
def records(self) -> List[AirbyteMessage]:
|
52
|
+
return self._get_message_by_types([Type.RECORD])
|
53
|
+
|
54
|
+
@property
|
55
|
+
def state_messages(self) -> List[AirbyteMessage]:
|
56
|
+
return self._get_message_by_types([Type.STATE])
|
57
|
+
|
58
|
+
@property
|
59
|
+
def logs(self) -> List[AirbyteMessage]:
|
60
|
+
return self._get_message_by_types([Type.LOG])
|
61
|
+
|
62
|
+
@property
|
63
|
+
def trace_messages(self) -> List[AirbyteMessage]:
|
64
|
+
return self._get_message_by_types([Type.TRACE])
|
65
|
+
|
66
|
+
@property
|
67
|
+
def analytics_messages(self) -> List[AirbyteMessage]:
|
68
|
+
return [message for message in self._get_message_by_types([Type.TRACE]) if message.trace.type == TraceType.ANALYTICS]
|
69
|
+
|
70
|
+
def _get_message_by_types(self, message_types: List[Type]) -> List[AirbyteMessage]:
|
71
|
+
return [message for message in self._messages if message.type in message_types]
|
72
|
+
|
73
|
+
|
74
|
+
def read(source: Source, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: Optional[Any] = None) -> EntrypointOutput:
|
75
|
+
"""
|
76
|
+
config and state must be json serializable
|
77
|
+
"""
|
78
|
+
log_capture_buffer = StringIO()
|
79
|
+
stream_handler = logging.StreamHandler(log_capture_buffer)
|
80
|
+
stream_handler.setLevel(logging.INFO)
|
81
|
+
stream_handler.setFormatter(AirbyteLogFormatter())
|
82
|
+
parent_logger = logging.getLogger("")
|
83
|
+
parent_logger.addHandler(stream_handler)
|
84
|
+
|
85
|
+
with tempfile.TemporaryDirectory() as tmp_directory:
|
86
|
+
tmp_directory_path = Path(tmp_directory)
|
87
|
+
args = [
|
88
|
+
"read",
|
89
|
+
"--config",
|
90
|
+
make_file(tmp_directory_path / "config.json", config),
|
91
|
+
"--catalog",
|
92
|
+
make_file(tmp_directory_path / "catalog.json", catalog.json()),
|
93
|
+
]
|
94
|
+
if state:
|
95
|
+
args.extend(
|
96
|
+
[
|
97
|
+
"--state",
|
98
|
+
make_file(tmp_directory_path / "state.json", state),
|
99
|
+
]
|
100
|
+
)
|
101
|
+
source_entrypoint = AirbyteEntrypoint(source)
|
102
|
+
parsed_args = source_entrypoint.parse_args(args)
|
103
|
+
messages = list(source_entrypoint.run(parsed_args))
|
104
|
+
captured_logs = log_capture_buffer.getvalue().split("\n")[:-1]
|
105
|
+
|
106
|
+
parent_logger.removeHandler(stream_handler)
|
107
|
+
|
108
|
+
return EntrypointOutput(messages + captured_logs)
|
109
|
+
|
110
|
+
|
111
|
+
def make_file(path: Path, file_contents: Optional[Union[str, Mapping[str, Any], List[Mapping[str, Any]]]]) -> str:
|
112
|
+
if isinstance(file_contents, str):
|
113
|
+
path.write_text(file_contents)
|
114
|
+
else:
|
115
|
+
path.write_text(json.dumps(file_contents))
|
116
|
+
return str(path)
|
@@ -1,7 +1,7 @@
|
|
1
1
|
airbyte_cdk/__init__.py,sha256=OBQWv5rF_QTRpOiP6J8J8oTU-GGrfi18i1PRFpahKks,262
|
2
2
|
airbyte_cdk/config_observation.py,sha256=3kjxv8xTwCnub2_fTWnMPRx0E7vly1BUeyXOSK15Ql4,3610
|
3
3
|
airbyte_cdk/connector.py,sha256=LtTAmBFV1LBUz_fOEbQ_EvBhyUsz8AGOlDsvK8QOOo0,4396
|
4
|
-
airbyte_cdk/entrypoint.py,sha256=
|
4
|
+
airbyte_cdk/entrypoint.py,sha256=uX3MawH1qukzxFjdR1AFynG0l5vLof9X40m_AYTrP_8,13180
|
5
5
|
airbyte_cdk/exception_handler.py,sha256=CwkiPdZ1WMOr3CBkvKFyHiyLerXGRqBrVlB4p0OImGI,1125
|
6
6
|
airbyte_cdk/logger.py,sha256=4Mi2MEQi1uh59BP9Dxw_UEbZuxaJewqK_jvEU2b10nk,3985
|
7
7
|
airbyte_cdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -24,7 +24,7 @@ airbyte_cdk/models/__init__.py,sha256=Kg8YHBqUsNWHlAw-u3ZGdG4dxLh7qBlHhqMRfamNCR
|
|
24
24
|
airbyte_cdk/models/airbyte_protocol.py,sha256=DoJvnmGM3xMAZFTwA6_RGMiKSFqfE3ib_Ru0KJ65Ag4,100
|
25
25
|
airbyte_cdk/models/well_known_types.py,sha256=KKfNbow2gdLoC1Z4hcXy_JR8m_acsB2ol7gQuEgjobw,117
|
26
26
|
airbyte_cdk/sources/__init__.py,sha256=Ov7Uf03KPSZUmMZqZfUAK3tQwsdKjDQUDvTb-H0JyfA,1141
|
27
|
-
airbyte_cdk/sources/abstract_source.py,sha256=
|
27
|
+
airbyte_cdk/sources/abstract_source.py,sha256=tlXXwCRBisbOu7pA2NYtiRolhHSCaKlXcET6dny5V9o,13043
|
28
28
|
airbyte_cdk/sources/config.py,sha256=PYsY7y2u3EUwxLiEb96JnuKwH_E8CuxKggsRO2ZPSRc,856
|
29
29
|
airbyte_cdk/sources/connector_state_manager.py,sha256=wsmUgII398MazCTKxwLBLzeiU6Z-tMTrKX882EEy-YE,10904
|
30
30
|
airbyte_cdk/sources/http_config.py,sha256=OBZeuyFilm6NlDlBhFQvHhTWabEvZww6OHDIlZujIS0,730
|
@@ -164,7 +164,7 @@ airbyte_cdk/sources/file_based/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
|
|
164
164
|
airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=WrV4sKtJoZ1dK31HK7NdBKlnYHkmu6NqjmEpkVqJ6tQ,4582
|
165
165
|
airbyte_cdk/sources/file_based/config/avro_format.py,sha256=lQSEq5JZY0M5y9mW93R4EjrIb8brYXUgrXCY-6EMHww,711
|
166
166
|
airbyte_cdk/sources/file_based/config/csv_format.py,sha256=L3JEgb91yrCob1oYrGl0088QEWblkOsRfDmMfWRQ0bg,7482
|
167
|
-
airbyte_cdk/sources/file_based/config/file_based_stream_config.py,sha256=
|
167
|
+
airbyte_cdk/sources/file_based/config/file_based_stream_config.py,sha256=lE1_Uzcfoly63xIoNigRFT0urOKjf0FQ7fLMKH2AX-s,4153
|
168
168
|
airbyte_cdk/sources/file_based/config/jsonl_format.py,sha256=fAPzZnoghGgHjaDvx6Qo68C8j54mBxo1NTdpwSI0VZo,374
|
169
169
|
airbyte_cdk/sources/file_based/config/parquet_format.py,sha256=8GTDTQyvS7pWLVG0LWirHVE1snHd0Au5R4Ym33-ezEg,736
|
170
170
|
airbyte_cdk/sources/file_based/config/unstructured_format.py,sha256=8yc0TMhlf1bcJc34IXzYkYHQ5HpGN4rt1f3zKSiCeYk,934
|
@@ -237,6 +237,7 @@ airbyte_cdk/sources/utils/slice_logger.py,sha256=YeWSoZeOsQp9oZK7mick2J8KFdiY726
|
|
237
237
|
airbyte_cdk/sources/utils/transform.py,sha256=4GYmO6bq33HF-a1in0dKQKqUOYI1bWItyuYF875bSQg,9493
|
238
238
|
airbyte_cdk/sources/utils/types.py,sha256=41ZQR681t5TUnOScij58d088sb99klH_ZENFcaYro_g,175
|
239
239
|
airbyte_cdk/test/__init__.py,sha256=f_XdkOg4_63QT2k3BbKY34209lppwgw-svzfZstQEq4,199
|
240
|
+
airbyte_cdk/test/entrypoint_wrapper.py,sha256=Bu67gvYwlnejmRc4fifkn4wKNOof9TuNfinYRdUUWVE,5155
|
240
241
|
airbyte_cdk/test/http/__init__.py,sha256=Gh2u6y10KXfvYqhC3Mm811U-b9FaoQMBxSW4hT2JJ6I,302
|
241
242
|
airbyte_cdk/test/http/matcher.py,sha256=zkcnnteguIYLI8nrZ1egttw94OfQPgY1LUsPjjytYgY,1117
|
242
243
|
airbyte_cdk/test/http/mocker.py,sha256=pKEEHnf_OsOv-czx1jyyftfvCy4fDgtl6k7SweytOrU,4472
|
@@ -356,9 +357,9 @@ unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slic
|
|
356
357
|
unit_tests/sources/file_based/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
357
358
|
unit_tests/sources/file_based/helpers.py,sha256=MZTwaWtX0a6TPbFcUMP-EgqBunK2wpoElgApCEE1bN4,2659
|
358
359
|
unit_tests/sources/file_based/in_memory_files_source.py,sha256=r2yD6-_ABXG7_PIyTq4ACN21sHyg3g-Hd9dIgxfDQUk,8235
|
359
|
-
unit_tests/sources/file_based/test_file_based_scenarios.py,sha256=
|
360
|
+
unit_tests/sources/file_based/test_file_based_scenarios.py,sha256=CMlhlccQzVq65cqu8V_Kpo1Eafumv-2OPCRwWNBnbGk,11418
|
360
361
|
unit_tests/sources/file_based/test_file_based_stream_reader.py,sha256=P6yTp7tbPfREzi5SXg4SSSql5nxiRV571YdOmwb_SzY,9219
|
361
|
-
unit_tests/sources/file_based/test_scenarios.py,sha256=
|
362
|
+
unit_tests/sources/file_based/test_scenarios.py,sha256=4xRRBZhqe__QWP8Fd5oXCHDI-0qKjrhypPAWGvmWwDM,7888
|
362
363
|
unit_tests/sources/file_based/test_schema_helpers.py,sha256=IYIDdLRK41RkSG_ZW2cagAt9krV4QLbkzu6r7vPx9Js,12047
|
363
364
|
unit_tests/sources/file_based/availability_strategy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
364
365
|
unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py,sha256=V9knz_3FHzk8k6R1JaiZgZjHHFNRa8pmH2PGqhRJ2UA,4432
|
@@ -377,12 +378,12 @@ unit_tests/sources/file_based/file_types/test_unstructured_parser.py,sha256=W6jb
|
|
377
378
|
unit_tests/sources/file_based/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
378
379
|
unit_tests/sources/file_based/scenarios/avro_scenarios.py,sha256=oeQUmCV7d2aTShreYc-PvVb4cWqLSsVwHfg-lcKjzPs,30554
|
379
380
|
unit_tests/sources/file_based/scenarios/check_scenarios.py,sha256=0xkt21ASTnTAMP0RYJEsF3yMGsNN7wWOoG_tmzL9PYw,6750
|
380
|
-
unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=
|
381
|
+
unit_tests/sources/file_based/scenarios/csv_scenarios.py,sha256=1u2oZse5VDVJVZBM4ZeLzP-6E7BDjhYCxHvWhQGFWBY,110050
|
381
382
|
unit_tests/sources/file_based/scenarios/file_based_source_builder.py,sha256=wgb7l5VohcEvZT82ZpJcjINSrjuJtzJZS4zuZjdKpJ4,3874
|
382
383
|
unit_tests/sources/file_based/scenarios/incremental_scenarios.py,sha256=B7YE2IbvgTH_v7DYQEuv7yn2IG15aKUvJ_7dA4d3Cg4,69413
|
383
384
|
unit_tests/sources/file_based/scenarios/jsonl_scenarios.py,sha256=LsOf-tpjWNuwskPcgAMhMpQQ3iaHaD3PjPmt2M2zSzo,31839
|
384
385
|
unit_tests/sources/file_based/scenarios/parquet_scenarios.py,sha256=MGgLCqkTJb8uNEwYZY3zbVVDZRSBKSmf2s8VMuYse_I,26549
|
385
|
-
unit_tests/sources/file_based/scenarios/scenario_builder.py,sha256=
|
386
|
+
unit_tests/sources/file_based/scenarios/scenario_builder.py,sha256=GeuAOg7eC6Wgx648-OoAcaQh19AdFLgeLTpN2vmyTaw,8894
|
386
387
|
unit_tests/sources/file_based/scenarios/unstructured_scenarios.py,sha256=3jeDxyLh6LgwK0wMhU884fqSXG47H3AWvIQDD15jO6c,64973
|
387
388
|
unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py,sha256=7CxIaqZxAGSPs4AtcKZ9FLVVYQPsS__uXi9wnQMKn3U,28322
|
388
389
|
unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py,sha256=Try0knJN5wfoGNO38QGoLGIcqSceSAQsUWO42CusNYI,33005
|
@@ -411,7 +412,7 @@ unit_tests/sources/streams/concurrent/scenarios/__init__.py,sha256=4Hw-PX1-VgESL
|
|
411
412
|
unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py,sha256=x77AQf8_O4dQ2aF1o800CzI0hOEyU8ayxoNdSOvxkhM,10495
|
412
413
|
unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py,sha256=OD_9R5fHt5Nf7hH8m28-UDoZJkY8iUBJLI73kd-u2BE,5794
|
413
414
|
unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py,sha256=v0yP5MRGYJAb9bp2yXnp5yUmYKJ6aAKjHcNHigL_ONY,13981
|
414
|
-
unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py,sha256=
|
415
|
+
unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py,sha256=Z_4-ClsxBupmN7Pbl8lF9bkSA9wnjLtrgA9WR_8VRi8,3757
|
415
416
|
unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py,sha256=KqCLsXB_9rV4hNdSPrNynK3G-UIsipqsZT6X0Z-iM5E,13175
|
416
417
|
unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py,sha256=aMtEOpCkxH-v2BBOYj4xABzPKcDYh_jieGfaIp4hy9w,5727
|
417
418
|
unit_tests/sources/streams/concurrent/scenarios/utils.py,sha256=Pl1F4asW8AvV6bV5W3Qg21GiLqfdMT_rOt1CsFA0aVM,1953
|
@@ -436,8 +437,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg
|
|
436
437
|
unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
|
437
438
|
unit_tests/utils/test_stream_status_utils.py,sha256=Xr8MZ2HWgTVIyMbywDvuYkRaUF4RZLQOT8-JjvcfR24,2970
|
438
439
|
unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
|
439
|
-
airbyte_cdk-0.55.
|
440
|
-
airbyte_cdk-0.55.
|
441
|
-
airbyte_cdk-0.55.
|
442
|
-
airbyte_cdk-0.55.
|
443
|
-
airbyte_cdk-0.55.
|
440
|
+
airbyte_cdk-0.55.5.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
441
|
+
airbyte_cdk-0.55.5.dist-info/METADATA,sha256=UGBscoQLxD10YK6vK2sziE_7IkU_JY03BSihYZcEJv0,11983
|
442
|
+
airbyte_cdk-0.55.5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
443
|
+
airbyte_cdk-0.55.5.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
|
444
|
+
airbyte_cdk-0.55.5.dist-info/RECORD,,
|
@@ -100,6 +100,7 @@ single_csv_scenario: TestScenario[InMemoryFilesSource] = (
|
|
100
100
|
"title": "Primary Key",
|
101
101
|
"description": "The column or columns (for a composite key) that serves as the unique identifier of a record.",
|
102
102
|
"type": "string",
|
103
|
+
"airbyte_hidden": True,
|
103
104
|
},
|
104
105
|
"days_to_sync_if_history_is_full": {
|
105
106
|
"title": "Days To Sync If History Is Full",
|
@@ -81,9 +81,13 @@ class TestScenario(Generic[SourceType]):
|
|
81
81
|
for stream in self.source.streams(self.config):
|
82
82
|
catalog["streams"].append(
|
83
83
|
{
|
84
|
-
"stream":
|
84
|
+
"stream": {
|
85
|
+
"name": stream.name,
|
86
|
+
"json_schema": {},
|
87
|
+
"supported_sync_modes": [sync_mode.value],
|
88
|
+
},
|
85
89
|
"sync_mode": sync_mode.value,
|
86
|
-
"destination_sync_mode": "append"
|
90
|
+
"destination_sync_mode": "append"
|
87
91
|
}
|
88
92
|
)
|
89
93
|
|
@@ -8,7 +8,6 @@ import pytest
|
|
8
8
|
from _pytest.capture import CaptureFixture
|
9
9
|
from airbyte_cdk.sources.abstract_source import AbstractSource
|
10
10
|
from freezegun import freeze_time
|
11
|
-
from pytest import LogCaptureFixture
|
12
11
|
from unit_tests.sources.file_based.scenarios.avro_scenarios import (
|
13
12
|
avro_all_types_scenario,
|
14
13
|
avro_file_with_double_as_number_scenario,
|
@@ -248,10 +247,8 @@ def test_file_based_discover(capsys: CaptureFixture[str], tmp_path: PosixPath, s
|
|
248
247
|
|
249
248
|
@pytest.mark.parametrize("scenario", read_scenarios, ids=[s.name for s in read_scenarios])
|
250
249
|
@freeze_time("2023-06-09T00:00:00Z")
|
251
|
-
def test_file_based_read(
|
252
|
-
|
253
|
-
) -> None:
|
254
|
-
verify_read(capsys, caplog, tmp_path, scenario)
|
250
|
+
def test_file_based_read(scenario: TestScenario[AbstractSource]) -> None:
|
251
|
+
verify_read(scenario)
|
255
252
|
|
256
253
|
|
257
254
|
@pytest.mark.parametrize("scenario", spec_scenarios, ids=[c.name for c in spec_scenarios])
|
@@ -11,11 +11,12 @@ import pytest
|
|
11
11
|
from _pytest.capture import CaptureFixture
|
12
12
|
from _pytest.reports import ExceptionInfo
|
13
13
|
from airbyte_cdk.entrypoint import launch
|
14
|
-
from airbyte_cdk.logger import AirbyteLogFormatter
|
15
14
|
from airbyte_cdk.models import AirbyteAnalyticsTraceMessage, SyncMode
|
16
15
|
from airbyte_cdk.sources import AbstractSource
|
16
|
+
from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput
|
17
|
+
from airbyte_cdk.test.entrypoint_wrapper import read as entrypoint_read
|
17
18
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
18
|
-
from
|
19
|
+
from airbyte_protocol.models import AirbyteLogMessage, AirbyteMessage, ConfiguredAirbyteCatalog
|
19
20
|
from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenario
|
20
21
|
|
21
22
|
|
@@ -37,58 +38,51 @@ def verify_discover(capsys: CaptureFixture[str], tmp_path: PosixPath, scenario:
|
|
37
38
|
_verify_expected_logs(logs, discover_logs)
|
38
39
|
|
39
40
|
|
40
|
-
def verify_read(
|
41
|
-
capsys: CaptureFixture[str], caplog: LogCaptureFixture, tmp_path: PosixPath, scenario: TestScenario[AbstractSource]
|
42
|
-
) -> None:
|
43
|
-
caplog.handler.setFormatter(AirbyteLogFormatter())
|
41
|
+
def verify_read(scenario: TestScenario[AbstractSource]) -> None:
|
44
42
|
if scenario.incremental_scenario_config:
|
45
|
-
run_test_read_incremental(
|
43
|
+
run_test_read_incremental(scenario)
|
46
44
|
else:
|
47
|
-
run_test_read_full_refresh(
|
45
|
+
run_test_read_full_refresh(scenario)
|
48
46
|
|
49
47
|
|
50
|
-
def run_test_read_full_refresh(
|
51
|
-
capsys: CaptureFixture[str], caplog: LogCaptureFixture, tmp_path: PosixPath, scenario: TestScenario[AbstractSource]
|
52
|
-
) -> None:
|
48
|
+
def run_test_read_full_refresh(scenario: TestScenario[AbstractSource]) -> None:
|
53
49
|
expected_exc, expected_msg = scenario.expected_read_error
|
54
50
|
if expected_exc:
|
55
51
|
with pytest.raises(expected_exc) as exc: # noqa
|
56
|
-
read(
|
52
|
+
read(scenario)
|
57
53
|
if expected_msg:
|
58
54
|
assert expected_msg in get_error_message_from_exc(exc)
|
59
55
|
else:
|
60
|
-
output = read(
|
56
|
+
output = read(scenario)
|
61
57
|
_verify_read_output(output, scenario)
|
62
58
|
|
63
59
|
|
64
|
-
def run_test_read_incremental(
|
65
|
-
capsys: CaptureFixture[str], caplog: LogCaptureFixture, tmp_path: PosixPath, scenario: TestScenario[AbstractSource]
|
66
|
-
) -> None:
|
60
|
+
def run_test_read_incremental(scenario: TestScenario[AbstractSource]) -> None:
|
67
61
|
expected_exc, expected_msg = scenario.expected_read_error
|
68
62
|
if expected_exc:
|
69
63
|
with pytest.raises(expected_exc):
|
70
|
-
read_with_state(
|
64
|
+
read_with_state(scenario)
|
71
65
|
else:
|
72
|
-
output = read_with_state(
|
66
|
+
output = read_with_state(scenario)
|
73
67
|
_verify_read_output(output, scenario)
|
74
68
|
|
75
69
|
|
76
|
-
def _verify_read_output(output:
|
77
|
-
records,
|
78
|
-
logs = [log for
|
70
|
+
def _verify_read_output(output: EntrypointOutput, scenario: TestScenario[AbstractSource]) -> None:
|
71
|
+
records, log_messages = output.records_and_state_messages, output.logs
|
72
|
+
logs = [message.log for message in log_messages if message.log.level.value in scenario.log_levels]
|
79
73
|
expected_records = scenario.expected_records
|
80
74
|
assert len(records) == len(expected_records)
|
81
75
|
for actual, expected in zip(records, expected_records):
|
82
|
-
if
|
83
|
-
assert len(actual
|
84
|
-
for key, value in actual
|
76
|
+
if actual.record:
|
77
|
+
assert len(actual.record.data) == len(expected["data"])
|
78
|
+
for key, value in actual.record.data.items():
|
85
79
|
if isinstance(value, float):
|
86
80
|
assert math.isclose(value, expected["data"][key], abs_tol=1e-04)
|
87
81
|
else:
|
88
82
|
assert value == expected["data"][key]
|
89
|
-
assert actual
|
90
|
-
elif
|
91
|
-
assert actual
|
83
|
+
assert actual.record.stream == expected["stream"]
|
84
|
+
elif actual.state:
|
85
|
+
assert actual.state.data == expected
|
92
86
|
|
93
87
|
if scenario.expected_logs:
|
94
88
|
read_logs = scenario.expected_logs.get("read")
|
@@ -96,25 +90,25 @@ def _verify_read_output(output: Dict[str, Any], scenario: TestScenario[AbstractS
|
|
96
90
|
_verify_expected_logs(logs, read_logs)
|
97
91
|
|
98
92
|
if scenario.expected_analytics:
|
99
|
-
analytics = output
|
93
|
+
analytics = output.analytics_messages
|
100
94
|
|
101
95
|
_verify_analytics(analytics, scenario.expected_analytics)
|
102
96
|
|
103
97
|
|
104
|
-
def _verify_analytics(analytics: List[
|
98
|
+
def _verify_analytics(analytics: List[AirbyteMessage], expected_analytics: Optional[List[AirbyteAnalyticsTraceMessage]]) -> None:
|
105
99
|
if expected_analytics:
|
106
100
|
for actual, expected in zip(analytics, expected_analytics):
|
107
|
-
actual_type, actual_value = actual
|
101
|
+
actual_type, actual_value = actual.trace.analytics.type, actual.trace.analytics.value
|
108
102
|
expected_type = expected.type
|
109
103
|
expected_value = expected.value
|
110
104
|
assert actual_type == expected_type
|
111
105
|
assert actual_value == expected_value
|
112
106
|
|
113
107
|
|
114
|
-
def _verify_expected_logs(logs: List[
|
108
|
+
def _verify_expected_logs(logs: List[AirbyteLogMessage], expected_logs: Optional[List[Mapping[str, Any]]]) -> None:
|
115
109
|
if expected_logs:
|
116
110
|
for actual, expected in zip(logs, expected_logs):
|
117
|
-
actual_level, actual_message = actual
|
111
|
+
actual_level, actual_message = actual.level.value, actual.message
|
118
112
|
expected_level = expected["level"]
|
119
113
|
expected_message = expected["message"]
|
120
114
|
assert actual_level == expected_level
|
@@ -172,55 +166,21 @@ def discover(capsys: CaptureFixture[str], tmp_path: PosixPath, scenario: TestSce
|
|
172
166
|
}
|
173
167
|
|
174
168
|
|
175
|
-
def read(
|
176
|
-
|
177
|
-
) -> Dict[str, Any]:
|
178
|
-
with caplog.handler.stream as logger_stream:
|
179
|
-
launch(
|
180
|
-
scenario.source,
|
181
|
-
[
|
182
|
-
"read",
|
183
|
-
"--config",
|
184
|
-
make_file(tmp_path / "config.json", scenario.config),
|
185
|
-
"--catalog",
|
186
|
-
make_file(tmp_path / "catalog.json", scenario.configured_catalog(SyncMode.full_refresh)),
|
187
|
-
],
|
188
|
-
)
|
189
|
-
captured = capsys.readouterr().out.splitlines() + logger_stream.getvalue().split("\n")[:-1]
|
190
|
-
|
191
|
-
return {
|
192
|
-
"records": [msg for msg in (json.loads(line) for line in captured) if msg["type"] == "RECORD"],
|
193
|
-
"logs": [msg["log"] for msg in (json.loads(line) for line in captured) if msg["type"] == "LOG"],
|
194
|
-
"analytics": [
|
195
|
-
msg["trace"]["analytics"]
|
196
|
-
for msg in (json.loads(line) for line in captured)
|
197
|
-
if msg["type"] == "TRACE" and msg["trace"]["type"] == "ANALYTICS"
|
198
|
-
],
|
199
|
-
}
|
200
|
-
|
201
|
-
|
202
|
-
def read_with_state(
|
203
|
-
capsys: CaptureFixture[str], caplog: LogCaptureFixture, tmp_path: PosixPath, scenario: TestScenario[AbstractSource]
|
204
|
-
) -> Dict[str, List[Any]]:
|
205
|
-
launch(
|
169
|
+
def read(scenario: TestScenario[AbstractSource]) -> EntrypointOutput:
|
170
|
+
return entrypoint_read(
|
206
171
|
scenario.source,
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
172
|
+
scenario.config,
|
173
|
+
ConfiguredAirbyteCatalog.parse_obj(scenario.configured_catalog(SyncMode.full_refresh)),
|
174
|
+
)
|
175
|
+
|
176
|
+
|
177
|
+
def read_with_state(scenario: TestScenario[AbstractSource]) -> EntrypointOutput:
|
178
|
+
return entrypoint_read(
|
179
|
+
scenario.source,
|
180
|
+
scenario.config,
|
181
|
+
ConfiguredAirbyteCatalog.parse_obj(scenario.configured_catalog(SyncMode.incremental)),
|
182
|
+
scenario.input_state(),
|
216
183
|
)
|
217
|
-
captured = capsys.readouterr()
|
218
|
-
logs = caplog.records
|
219
|
-
return {
|
220
|
-
"records": [msg for msg in (json.loads(line) for line in captured.out.splitlines()) if msg["type"] in ("RECORD", "STATE")],
|
221
|
-
"logs": [msg["log"] for msg in (json.loads(line) for line in captured.out.splitlines()) if msg["type"] == "LOG"]
|
222
|
-
+ [{"level": log.levelname, "message": log.message} for log in logs],
|
223
|
-
}
|
224
184
|
|
225
185
|
|
226
186
|
def make_file(path: Path, file_contents: Optional[Union[Mapping[str, Any], List[Mapping[str, Any]]]]) -> str:
|
@@ -7,7 +7,6 @@ from pathlib import PosixPath
|
|
7
7
|
import pytest
|
8
8
|
from _pytest.capture import CaptureFixture
|
9
9
|
from freezegun import freeze_time
|
10
|
-
from pytest import LogCaptureFixture
|
11
10
|
from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenario
|
12
11
|
from unit_tests.sources.file_based.test_scenarios import verify_discover, verify_read
|
13
12
|
from unit_tests.sources.streams.concurrent.scenarios.incremental_scenarios import (
|
@@ -68,8 +67,8 @@ scenarios = [
|
|
68
67
|
|
69
68
|
@pytest.mark.parametrize("scenario", scenarios, ids=[s.name for s in scenarios])
|
70
69
|
@freeze_time("2023-06-09T00:00:00Z")
|
71
|
-
def test_concurrent_read(
|
72
|
-
verify_read(
|
70
|
+
def test_concurrent_read(scenario: TestScenario) -> None:
|
71
|
+
verify_read(scenario)
|
73
72
|
|
74
73
|
|
75
74
|
@pytest.mark.parametrize("scenario", scenarios, ids=[s.name for s in scenarios])
|
File without changes
|
File without changes
|
File without changes
|