airbyte-cdk 6.54.11__py3-none-any.whl → 6.55.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. airbyte_cdk/cli/airbyte_cdk/_connector.py +32 -8
  2. airbyte_cdk/cli/airbyte_cdk/_image.py +76 -0
  3. airbyte_cdk/cli/airbyte_cdk/_secrets.py +13 -12
  4. airbyte_cdk/models/airbyte_protocol_serializers.py +4 -0
  5. airbyte_cdk/models/connector_metadata.py +14 -0
  6. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +1 -1
  7. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +31 -0
  8. airbyte_cdk/sources/declarative/manifest_declarative_source.py +28 -9
  9. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +23 -2
  10. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +2 -2
  11. airbyte_cdk/test/entrypoint_wrapper.py +163 -26
  12. airbyte_cdk/test/models/scenario.py +49 -10
  13. airbyte_cdk/test/standard_tests/__init__.py +2 -4
  14. airbyte_cdk/test/standard_tests/connector_base.py +12 -80
  15. airbyte_cdk/test/standard_tests/docker_base.py +388 -0
  16. airbyte_cdk/test/standard_tests/pytest_hooks.py +115 -2
  17. airbyte_cdk/test/standard_tests/source_base.py +13 -7
  18. airbyte_cdk/test/standard_tests/util.py +4 -3
  19. airbyte_cdk/utils/connector_paths.py +3 -3
  20. airbyte_cdk/utils/docker.py +83 -34
  21. {airbyte_cdk-6.54.11.dist-info → airbyte_cdk-6.55.1.dist-info}/METADATA +2 -1
  22. {airbyte_cdk-6.54.11.dist-info → airbyte_cdk-6.55.1.dist-info}/RECORD +26 -25
  23. {airbyte_cdk-6.54.11.dist-info → airbyte_cdk-6.55.1.dist-info}/LICENSE.txt +0 -0
  24. {airbyte_cdk-6.54.11.dist-info → airbyte_cdk-6.55.1.dist-info}/LICENSE_SHORT +0 -0
  25. {airbyte_cdk-6.54.11.dist-info → airbyte_cdk-6.55.1.dist-info}/WHEEL +0 -0
  26. {airbyte_cdk-6.54.11.dist-info → airbyte_cdk-6.55.1.dist-info}/entry_points.txt +0 -0
@@ -19,9 +19,11 @@ import logging
19
19
  import re
20
20
  import tempfile
21
21
  import traceback
22
+ from collections import deque
23
+ from collections.abc import Generator, Mapping
22
24
  from io import StringIO
23
25
  from pathlib import Path
24
- from typing import Any, List, Mapping, Optional, Union
26
+ from typing import Any, List, Literal, Optional, Union, final, overload
25
27
 
26
28
  import orjson
27
29
  from pydantic import ValidationError as V2ValidationError
@@ -36,6 +38,7 @@ from airbyte_cdk.models import (
36
38
  AirbyteMessageSerializer,
37
39
  AirbyteStateMessage,
38
40
  AirbyteStateMessageSerializer,
41
+ AirbyteStreamState,
39
42
  AirbyteStreamStatus,
40
43
  ConfiguredAirbyteCatalog,
41
44
  ConfiguredAirbyteCatalogSerializer,
@@ -48,13 +51,41 @@ from airbyte_cdk.test.models.scenario import ExpectedOutcome
48
51
 
49
52
 
50
53
  class EntrypointOutput:
51
- def __init__(self, messages: List[str], uncaught_exception: Optional[BaseException] = None):
52
- try:
53
- self._messages = [self._parse_message(message) for message in messages]
54
- except V2ValidationError as exception:
55
- raise ValueError("All messages are expected to be AirbyteMessage") from exception
54
+ """A class to encapsulate the output of an Airbyte connector's execution.
55
+
56
+ This class can be initialized with a list of messages or a file containing messages.
57
+ It provides methods to access different types of messages produced during the execution
58
+ of an Airbyte connector, including both successful messages and error messages.
59
+
60
+ When working with records and state messages, it provides both a list and an iterator
61
+ implementation. Lists are easier to work with, but generators are better suited to handle
62
+ large volumes of messages without overflowing the available memory.
63
+ """
64
+
65
+ def __init__(
66
+ self,
67
+ messages: list[str] | None = None,
68
+ uncaught_exception: Optional[BaseException] = None,
69
+ *,
70
+ message_file: Path | None = None,
71
+ ) -> None:
72
+ if messages is None and message_file is None:
73
+ raise ValueError("Either messages or message_file must be provided")
74
+ if messages is not None and message_file is not None:
75
+ raise ValueError("Only one of messages or message_file can be provided")
76
+
77
+ self._messages: list[AirbyteMessage] | None = None
78
+ self._message_file: Path | None = message_file
79
+ if messages:
80
+ try:
81
+ self._messages = [self._parse_message(message) for message in messages]
82
+ except V2ValidationError as exception:
83
+ raise ValueError("All messages are expected to be AirbyteMessage") from exception
56
84
 
57
85
  if uncaught_exception:
86
+ if self._messages is None:
87
+ self._messages = []
88
+
58
89
  self._messages.append(
59
90
  assemble_uncaught_exception(
60
91
  type(uncaught_exception), uncaught_exception
@@ -72,39 +103,76 @@ class EntrypointOutput:
72
103
  )
73
104
 
74
105
  @property
75
- def records_and_state_messages(self) -> List[AirbyteMessage]:
76
- return self._get_message_by_types([Type.RECORD, Type.STATE])
106
+ def records_and_state_messages(
107
+ self,
108
+ ) -> list[AirbyteMessage]:
109
+ return self.get_message_by_types(
110
+ message_types=[Type.RECORD, Type.STATE],
111
+ safe_iterator=False,
112
+ )
113
+
114
+ def records_and_state_messages_iterator(
115
+ self,
116
+ ) -> Generator[AirbyteMessage, None, None]:
117
+ """Returns a generator that yields record and state messages one by one.
118
+
119
+ Use this instead of `records_and_state_messages` when the volume of messages could be large
120
+ enough to overload available memory.
121
+ """
122
+ return self.get_message_by_types(
123
+ message_types=[Type.RECORD, Type.STATE],
124
+ safe_iterator=True,
125
+ )
77
126
 
78
127
  @property
79
128
  def records(self) -> List[AirbyteMessage]:
80
- return self._get_message_by_types([Type.RECORD])
129
+ return self.get_message_by_types([Type.RECORD])
130
+
131
+ @property
132
+ def records_iterator(self) -> Generator[AirbyteMessage, None, None]:
133
+ """Returns a generator that yields record messages one by one.
134
+
135
+ Use this instead of `records` when the volume of records could be large
136
+ enough to overload available memory.
137
+ """
138
+ return self.get_message_by_types([Type.RECORD], safe_iterator=True)
81
139
 
82
140
  @property
83
141
  def state_messages(self) -> List[AirbyteMessage]:
84
- return self._get_message_by_types([Type.STATE])
142
+ return self.get_message_by_types([Type.STATE])
85
143
 
86
144
  @property
87
145
  def spec_messages(self) -> List[AirbyteMessage]:
88
- return self._get_message_by_types([Type.SPEC])
146
+ return self.get_message_by_types([Type.SPEC])
89
147
 
90
148
  @property
91
149
  def connection_status_messages(self) -> List[AirbyteMessage]:
92
- return self._get_message_by_types([Type.CONNECTION_STATUS])
150
+ return self.get_message_by_types([Type.CONNECTION_STATUS])
93
151
 
94
152
  @property
95
- def most_recent_state(self) -> Any:
96
- state_messages = self._get_message_by_types([Type.STATE])
97
- if not state_messages:
98
- raise ValueError("Can't provide most recent state as there are no state messages")
99
- return state_messages[-1].state.stream # type: ignore[union-attr] # state has `stream`
153
+ def most_recent_state(self) -> AirbyteStreamState | None:
154
+ state_message_iterator = self.get_message_by_types(
155
+ [Type.STATE],
156
+ safe_iterator=True,
157
+ )
158
+ # Use a deque with maxlen=1 to efficiently get the last state message
159
+ double_ended_queue = deque(state_message_iterator, maxlen=1)
160
+ try:
161
+ final_state_message: AirbyteMessage = double_ended_queue.pop()
162
+ except IndexError:
163
+ raise ValueError(
164
+ "Can't provide most recent state as there are no state messages."
165
+ ) from None
166
+
167
+ return final_state_message.state.stream # type: ignore[union-attr] # state has `stream`
100
168
 
101
169
  @property
102
170
  def logs(self) -> List[AirbyteMessage]:
103
- return self._get_message_by_types([Type.LOG])
171
+ return self.get_message_by_types([Type.LOG])
104
172
 
105
173
  @property
106
174
  def trace_messages(self) -> List[AirbyteMessage]:
107
- return self._get_message_by_types([Type.TRACE])
175
+ return self.get_message_by_types([Type.TRACE])
108
176
 
109
177
  @property
110
178
  def analytics_messages(self) -> List[AirbyteMessage]:
@@ -116,7 +184,7 @@ class EntrypointOutput:
116
184
 
117
185
  @property
118
186
  def catalog(self) -> AirbyteMessage:
119
- catalog = self._get_message_by_types([Type.CATALOG])
187
+ catalog = self.get_message_by_types([Type.CATALOG])
120
188
  if len(catalog) != 1:
121
189
  raise ValueError(f"Expected exactly one catalog but got {len(catalog)}")
122
190
  return catalog[0]
@@ -131,13 +199,80 @@ class EntrypointOutput:
131
199
  )
132
200
  return list(status_messages)
133
201
 
134
- def _get_message_by_types(self, message_types: List[Type]) -> List[AirbyteMessage]:
135
- return [message for message in self._messages if message.type in message_types]
202
+ def get_message_iterator(self) -> Generator[AirbyteMessage, None, None]:
203
+ """Creates a generator which yields messages one by one.
204
+
205
+ This will iterate over all messages in the output file (if provided) or the messages
206
+ provided during initialization. File results are provided first, followed by any
207
+ messages that were passed in directly.
208
+ """
209
+ if self._message_file:
210
+ try:
211
+ with open(self._message_file, "r", encoding="utf-8") as file:
212
+ for line in file:
213
+ if not line.strip():
214
+ # Skip empty lines
215
+ continue
216
+
217
+ yield self._parse_message(line.strip())
218
+ except FileNotFoundError:
219
+ raise ValueError(f"Message file {self._message_file} not found")
220
+
221
+ if self._messages is not None:
222
+ yield from self._messages
223
+
224
+ # Overloads to provide proper type hints for different usages of `get_message_by_types`.
225
+
226
+ @overload
227
+ def get_message_by_types(
228
+ self,
229
+ message_types: list[Type],
230
+ ) -> list[AirbyteMessage]: ...
231
+
232
+ @overload
233
+ def get_message_by_types(
234
+ self,
235
+ message_types: list[Type],
236
+ *,
237
+ safe_iterator: Literal[False],
238
+ ) -> list[AirbyteMessage]: ...
239
+
240
+ @overload
241
+ def get_message_by_types(
242
+ self,
243
+ message_types: list[Type],
244
+ *,
245
+ safe_iterator: Literal[True],
246
+ ) -> Generator[AirbyteMessage, None, None]: ...
247
+
248
+ def get_message_by_types(
249
+ self,
250
+ message_types: list[Type],
251
+ *,
252
+ safe_iterator: bool = False,
253
+ ) -> list[AirbyteMessage] | Generator[AirbyteMessage, None, None]:
254
+ """Get messages of specific types.
255
+
256
+ If `safe_iterator` is True, returns a generator that yields messages one by one.
257
+ If `safe_iterator` is False, returns a list of messages.
258
+
259
+ Use `safe_iterator=True` when the volume of messages could overload the available
260
+ memory.
261
+ """
262
+ message_generator = self.get_message_iterator()
263
+
264
+ if safe_iterator:
265
+ return (message for message in message_generator if message.type in message_types)
266
+
267
+ return [message for message in message_generator if message.type in message_types]
136
268
 
137
269
  def _get_trace_message_by_trace_type(self, trace_type: TraceType) -> List[AirbyteMessage]:
138
270
  return [
139
271
  message
140
- for message in self._get_message_by_types([Type.TRACE])
272
+ for message in self.get_message_by_types(
273
+ [Type.TRACE],
274
+ safe_iterator=True,
275
+ )
141
276
  if message.trace.type == trace_type # type: ignore[union-attr] # trace has `type`
142
277
  ]
143
278
 
@@ -184,7 +319,7 @@ def _run_command(
184
319
  parsed_args = AirbyteEntrypoint.parse_args(args)
185
320
 
186
321
  source_entrypoint = AirbyteEntrypoint(source)
187
- messages = []
322
+ messages: list[str] = []
188
323
  uncaught_exception = None
189
324
  try:
190
325
  for message in source_entrypoint.run(parsed_args):
@@ -199,8 +334,10 @@ def _run_command(
199
334
  captured_logs = log_capture_buffer.getvalue().split("\n")[:-1]
200
335
 
201
336
  parent_logger.removeHandler(stream_handler)
202
-
203
- return EntrypointOutput(messages + captured_logs, uncaught_exception=uncaught_exception)
337
+ return EntrypointOutput(
338
+ messages=messages + captured_logs,
339
+ uncaught_exception=uncaught_exception,
340
+ )
204
341
 
205
342
 
206
343
  def discover(
@@ -9,14 +9,20 @@ up iteration cycles.
9
9
 
10
10
  from __future__ import annotations
11
11
 
12
+ import json
13
+ import tempfile
14
+ from contextlib import contextmanager, suppress
12
15
  from pathlib import Path # noqa: TC003 # Pydantic needs this (don't move to 'if typing' block)
13
- from typing import Any, Literal, cast
16
+ from typing import TYPE_CHECKING, Any, Literal, cast
14
17
 
15
18
  import yaml
16
19
  from pydantic import BaseModel, ConfigDict
17
20
 
18
21
  from airbyte_cdk.test.models.outcome import ExpectedOutcome
19
22
 
23
+ if TYPE_CHECKING:
24
+ from collections.abc import Generator
25
+
20
26
 
21
27
  class ConnectorTestScenario(BaseModel):
22
28
  """Acceptance test scenario, as a Pydantic model.
@@ -41,13 +47,13 @@ class ConnectorTestScenario(BaseModel):
41
47
  config_path: Path | None = None
42
48
  config_dict: dict[str, Any] | None = None
43
49
 
44
- id: str | None = None
50
+ _id: str | None = None # Used to override the default ID generation
45
51
 
46
52
  configured_catalog_path: Path | None = None
47
53
  timeout_seconds: int | None = None
48
54
  expect_records: AcceptanceTestExpectRecords | None = None
49
55
  file_types: AcceptanceTestFileTypes | None = None
50
- status: Literal["succeed", "failed"] | None = None
56
+ status: Literal["succeed", "failed", "exception"] | None = None
51
57
 
52
58
  def get_config_dict(
53
59
  self,
@@ -93,16 +99,49 @@ class ConnectorTestScenario(BaseModel):
93
99
  return ExpectedOutcome.from_status_str(self.status)
94
100
 
95
101
  @property
96
- def instance_name(self) -> str:
97
- return self.config_path.stem if self.config_path else "Unnamed Scenario"
102
+ def id(self) -> str:
103
+ """Return a unique identifier for the test scenario.
104
+
105
+ This is used by PyTest to identify the test scenario.
106
+ """
107
+ if self._id:
108
+ return self._id
98
109
 
99
- def __str__(self) -> str:
100
- if self.id:
101
- return f"'{self.id}' Test Scenario"
102
110
  if self.config_path:
103
- return f"'{self.config_path.name}' Test Scenario"
111
+ return self.config_path.stem
112
+
113
+ return str(hash(self))
104
114
 
105
- return f"'{hash(self)}' Test Scenario"
115
+ def __str__(self) -> str:
116
+ return f"'{self.id}' Test Scenario"
117
+
118
+ @contextmanager
119
+ def with_temp_config_file(
120
+ self,
121
+ connector_root: Path,
122
+ ) -> Generator[Path, None, None]:
123
+ """Yield a temporary JSON file path containing the config dict and delete it on exit."""
124
+ config = self.get_config_dict(
125
+ empty_if_missing=True,
126
+ connector_root=connector_root,
127
+ )
128
+ with tempfile.NamedTemporaryFile(
129
+ prefix="config-",
130
+ suffix=".json",
131
+ mode="w",
132
+ delete=False, # Don't fail if cannot delete the file on exit
133
+ encoding="utf-8",
134
+ ) as temp_file:
135
+ temp_file.write(json.dumps(config))
136
+ temp_file.flush()
137
+ # Allow the file to be read by other processes
138
+ temp_path = Path(temp_file.name)
139
+ temp_path.chmod(temp_path.stat().st_mode | 0o444)
140
+ yield temp_path
141
+
142
+ # attempt cleanup, ignore errors
143
+ with suppress(OSError):
144
+ temp_path.unlink()
106
145
 
107
146
  def without_expected_outcome(self) -> ConnectorTestScenario:
108
147
  """Return a copy of the scenario that does not expect failure or success.
@@ -27,10 +27,8 @@ Available test suites base classes:
27
27
 
28
28
  '''
29
29
 
30
- from airbyte_cdk.test.standard_tests.connector_base import (
31
- ConnectorTestScenario,
32
- ConnectorTestSuiteBase,
33
- )
30
+ from airbyte_cdk.test.models.scenario import ConnectorTestScenario
31
+ from airbyte_cdk.test.standard_tests.connector_base import ConnectorTestSuiteBase
34
32
  from airbyte_cdk.test.standard_tests.declarative_sources import (
35
33
  DeclarativeSourceTestSuite,
36
34
  )
@@ -3,16 +3,11 @@
3
3
 
4
4
  from __future__ import annotations
5
5
 
6
- import abc
7
6
  import importlib
8
- import inspect
9
7
  import os
10
- import sys
11
- from collections.abc import Callable
12
8
  from pathlib import Path
13
- from typing import cast
9
+ from typing import TYPE_CHECKING, cast
14
10
 
15
- import yaml
16
11
  from boltons.typeutils import classproperty
17
12
 
18
13
  from airbyte_cdk.models import (
@@ -24,14 +19,20 @@ from airbyte_cdk.test.models import (
24
19
  ConnectorTestScenario,
25
20
  )
26
21
  from airbyte_cdk.test.standard_tests._job_runner import IConnector, run_test_job
22
+ from airbyte_cdk.test.standard_tests.docker_base import DockerConnectorTestSuite
27
23
  from airbyte_cdk.utils.connector_paths import (
28
24
  ACCEPTANCE_TEST_CONFIG,
29
25
  find_connector_root,
30
26
  )
31
27
 
28
+ if TYPE_CHECKING:
29
+ from collections.abc import Callable
32
30
 
33
- class ConnectorTestSuiteBase(abc.ABC):
34
- """Base class for connector test suites."""
31
+ from airbyte_cdk.test import entrypoint_wrapper
32
+
33
+
34
+ class ConnectorTestSuiteBase(DockerConnectorTestSuite):
35
+ """Base class for Python connector test suites."""
35
36
 
36
37
  connector: type[IConnector] | Callable[[], IConnector] | None # type: ignore [reportRedeclaration]
37
38
  """The connector class or a factory function that returns an scenario of IConnector."""
@@ -79,13 +80,6 @@ class ConnectorTestSuiteBase(abc.ABC):
79
80
  ) from e
80
81
  return cast(type[IConnector], getattr(module, matching_class_name))
81
82
 
82
- @classmethod
83
- def get_test_class_dir(cls) -> Path:
84
- """Get the file path that contains the class."""
85
- module = sys.modules[cls.__module__]
86
- # Get the directory containing the test file
87
- return Path(inspect.getfile(module)).parent
88
-
89
83
  @classmethod
90
84
  def create_connector(
91
85
  cls,
@@ -118,69 +112,7 @@ class ConnectorTestSuiteBase(abc.ABC):
118
112
  test_scenario=scenario,
119
113
  connector_root=self.get_connector_root_dir(),
120
114
  )
121
- conn_status_messages: list[AirbyteMessage] = [
122
- msg for msg in result._messages if msg.type == Type.CONNECTION_STATUS
123
- ] # noqa: SLF001 # Non-public API
124
- assert len(conn_status_messages) == 1, (
125
- f"Expected exactly one CONNECTION_STATUS message. Got: {result._messages}"
115
+ assert len(result.connection_status_messages) == 1, (
116
+ f"Expected exactly one CONNECTION_STATUS message. "
117
+ "Got: {result.connection_status_messages!s}"
126
118
  )
127
-
128
- @classmethod
129
- def get_connector_root_dir(cls) -> Path:
130
- """Get the root directory of the connector."""
131
- return find_connector_root([cls.get_test_class_dir(), Path.cwd()])
132
-
133
- @classproperty
134
- def acceptance_test_config_path(cls) -> Path:
135
- """Get the path to the acceptance test config file."""
136
- result = cls.get_connector_root_dir() / ACCEPTANCE_TEST_CONFIG
137
- if result.exists():
138
- return result
139
-
140
- raise FileNotFoundError(f"Acceptance test config file not found at: {str(result)}")
141
-
142
- @classmethod
143
- def get_scenarios(
144
- cls,
145
- ) -> list[ConnectorTestScenario]:
146
- """Get acceptance tests for a given category.
147
-
148
- This has to be a separate function because pytest does not allow
149
- parametrization of fixtures with arguments from the test class itself.
150
- """
151
- categories = ["connection", "spec"]
152
- all_tests_config = yaml.safe_load(cls.acceptance_test_config_path.read_text())
153
- if "acceptance_tests" not in all_tests_config:
154
- raise ValueError(
155
- f"Acceptance tests config not found in {cls.acceptance_test_config_path}."
156
- f" Found only: {str(all_tests_config)}."
157
- )
158
-
159
- test_scenarios: list[ConnectorTestScenario] = []
160
- for category in categories:
161
- if (
162
- category not in all_tests_config["acceptance_tests"]
163
- or "tests" not in all_tests_config["acceptance_tests"][category]
164
- ):
165
- continue
166
-
167
- for test in all_tests_config["acceptance_tests"][category]["tests"]:
168
- if "config_path" not in test:
169
- # Skip tests without a config_path
170
- continue
171
-
172
- if "iam_role" in test["config_path"]:
173
- # We skip iam_role tests for now, as they are not supported in the test suite.
174
- continue
175
-
176
- scenario = ConnectorTestScenario.model_validate(test)
177
-
178
- if scenario.config_path and scenario.config_path in [
179
- s.config_path for s in test_scenarios
180
- ]:
181
- # Skip duplicate scenarios based on config_path
182
- continue
183
-
184
- test_scenarios.append(scenario)
185
-
186
- return test_scenarios