airbyte-internal-ops 0.1.2.post2.dev20080805740__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {airbyte_internal_ops-0.1.2.post2.dev20080805740.dist-info → airbyte_internal_ops-0.1.4.dist-info}/METADATA +8 -5
- {airbyte_internal_ops-0.1.2.post2.dev20080805740.dist-info → airbyte_internal_ops-0.1.4.dist-info}/RECORD +31 -11
- airbyte_ops_mcp/_legacy/airbyte_ci/connector_pipelines/airbyte_ci/connectors/test/steps/common.py +1 -1
- airbyte_ops_mcp/cli/cloud.py +309 -38
- airbyte_ops_mcp/cloud_admin/connection_config.py +131 -0
- airbyte_ops_mcp/live_tests/__init__.py +16 -0
- airbyte_ops_mcp/live_tests/_connection_retriever/__init__.py +35 -0
- airbyte_ops_mcp/live_tests/_connection_retriever/audit_logging.py +88 -0
- airbyte_ops_mcp/live_tests/_connection_retriever/consts.py +33 -0
- airbyte_ops_mcp/live_tests/_connection_retriever/db_access.py +82 -0
- airbyte_ops_mcp/live_tests/_connection_retriever/retrieval.py +391 -0
- airbyte_ops_mcp/live_tests/_connection_retriever/secrets_resolution.py +130 -0
- airbyte_ops_mcp/live_tests/config.py +190 -0
- airbyte_ops_mcp/live_tests/connection_fetcher.py +159 -2
- airbyte_ops_mcp/live_tests/connection_secret_retriever.py +173 -0
- airbyte_ops_mcp/live_tests/evaluation_modes.py +45 -0
- airbyte_ops_mcp/live_tests/http_metrics.py +81 -0
- airbyte_ops_mcp/live_tests/message_cache/__init__.py +15 -0
- airbyte_ops_mcp/live_tests/message_cache/duckdb_cache.py +415 -0
- airbyte_ops_mcp/live_tests/obfuscation.py +126 -0
- airbyte_ops_mcp/live_tests/regression/__init__.py +29 -0
- airbyte_ops_mcp/live_tests/regression/comparators.py +466 -0
- airbyte_ops_mcp/live_tests/schema_generation.py +154 -0
- airbyte_ops_mcp/live_tests/validation/__init__.py +43 -0
- airbyte_ops_mcp/live_tests/validation/catalog_validators.py +389 -0
- airbyte_ops_mcp/live_tests/validation/record_validators.py +227 -0
- airbyte_ops_mcp/mcp/_mcp_utils.py +3 -0
- airbyte_ops_mcp/mcp/live_tests.py +500 -0
- airbyte_ops_mcp/mcp/server.py +3 -0
- {airbyte_internal_ops-0.1.2.post2.dev20080805740.dist-info → airbyte_internal_ops-0.1.4.dist-info}/WHEEL +0 -0
- {airbyte_internal_ops-0.1.2.post2.dev20080805740.dist-info → airbyte_internal_ops-0.1.4.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
|
2
|
+
"""Catalog validation functions for discovered catalogs.
|
|
3
|
+
|
|
4
|
+
Based on airbyte-ci validation tests:
|
|
5
|
+
https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/live-tests/src/live_tests/validation_tests/test_discover.py
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
import dpath.util
|
|
14
|
+
import jsonschema
|
|
15
|
+
from airbyte_protocol.models import AirbyteCatalog, AirbyteStream
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class ValidationResult:
|
|
20
|
+
"""Result of a validation check."""
|
|
21
|
+
|
|
22
|
+
passed: bool
|
|
23
|
+
message: str
|
|
24
|
+
errors: list[str] = field(default_factory=list)
|
|
25
|
+
warnings: list[str] = field(default_factory=list)
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def success(cls, message: str = "Validation passed") -> ValidationResult:
|
|
29
|
+
return cls(passed=True, message=message)
|
|
30
|
+
|
|
31
|
+
@classmethod
|
|
32
|
+
def failure(cls, message: str, errors: list[str] | None = None) -> ValidationResult:
|
|
33
|
+
return cls(passed=False, message=message, errors=errors or [])
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def validate_catalog_has_streams(catalog: AirbyteCatalog) -> ValidationResult:
|
|
37
|
+
"""Validate that the catalog has at least one stream.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
catalog: The discovered catalog to validate.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
ValidationResult indicating success or failure.
|
|
44
|
+
"""
|
|
45
|
+
if not catalog.streams:
|
|
46
|
+
return ValidationResult.failure(
|
|
47
|
+
"Catalog should contain at least one stream",
|
|
48
|
+
errors=["No streams found in catalog"],
|
|
49
|
+
)
|
|
50
|
+
return ValidationResult.success(f"Catalog contains {len(catalog.streams)} streams")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def validate_no_duplicate_stream_names(catalog: AirbyteCatalog) -> ValidationResult:
|
|
54
|
+
"""Validate that all stream names in the catalog are unique.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
catalog: The discovered catalog to validate.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
ValidationResult indicating success or failure.
|
|
61
|
+
"""
|
|
62
|
+
name_counts: dict[str, int] = {}
|
|
63
|
+
for stream in catalog.streams:
|
|
64
|
+
count = name_counts.get(stream.name, 0)
|
|
65
|
+
name_counts[stream.name] = count + 1
|
|
66
|
+
|
|
67
|
+
duplicates = [name for name, count in name_counts.items() if count > 1]
|
|
68
|
+
if duplicates:
|
|
69
|
+
return ValidationResult.failure(
|
|
70
|
+
f"Catalog has duplicate stream names: {duplicates}",
|
|
71
|
+
errors=[f"Stream '{name}' appears multiple times" for name in duplicates],
|
|
72
|
+
)
|
|
73
|
+
return ValidationResult.success("All stream names are unique")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def validate_schemas_are_valid_json_schema(catalog: AirbyteCatalog) -> ValidationResult:
|
|
77
|
+
"""Validate that all stream schemas are valid JSON Schema Draft 7.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
catalog: The discovered catalog to validate.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
ValidationResult indicating success or failure.
|
|
84
|
+
"""
|
|
85
|
+
errors = []
|
|
86
|
+
for stream in catalog.streams:
|
|
87
|
+
try:
|
|
88
|
+
jsonschema.Draft7Validator.check_schema(stream.json_schema)
|
|
89
|
+
except jsonschema.exceptions.SchemaError as e:
|
|
90
|
+
errors.append(
|
|
91
|
+
f"Stream '{stream.name}' has invalid JSON schema: {e.message}"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
if errors:
|
|
95
|
+
return ValidationResult.failure(
|
|
96
|
+
"Some streams have invalid JSON schemas",
|
|
97
|
+
errors=errors,
|
|
98
|
+
)
|
|
99
|
+
return ValidationResult.success("All stream schemas are valid JSON Schema Draft 7")
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def validate_cursors_exist_in_schema(catalog: AirbyteCatalog) -> ValidationResult:
|
|
103
|
+
"""Validate that all defined cursor fields exist in their stream schemas.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
catalog: The discovered catalog to validate.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
ValidationResult indicating success or failure.
|
|
110
|
+
"""
|
|
111
|
+
errors = []
|
|
112
|
+
for stream in catalog.streams:
|
|
113
|
+
if not stream.default_cursor_field:
|
|
114
|
+
continue
|
|
115
|
+
|
|
116
|
+
schema = stream.json_schema
|
|
117
|
+
if "properties" not in schema:
|
|
118
|
+
errors.append(
|
|
119
|
+
f"Stream '{stream.name}' has cursor field but no 'properties' in schema"
|
|
120
|
+
)
|
|
121
|
+
continue
|
|
122
|
+
|
|
123
|
+
cursor_path = "/properties/".join(stream.default_cursor_field)
|
|
124
|
+
cursor_field_location = dpath.util.search(schema["properties"], cursor_path)
|
|
125
|
+
if not cursor_field_location:
|
|
126
|
+
errors.append(
|
|
127
|
+
f"Stream '{stream.name}': cursor field {stream.default_cursor_field} "
|
|
128
|
+
"not found in schema properties"
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
if errors:
|
|
132
|
+
return ValidationResult.failure(
|
|
133
|
+
"Some cursor fields are not defined in their schemas",
|
|
134
|
+
errors=errors,
|
|
135
|
+
)
|
|
136
|
+
return ValidationResult.success("All cursor fields exist in their schemas")
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _find_all_values_for_key(
|
|
140
|
+
schema: dict[str, Any] | list[Any] | Any,
|
|
141
|
+
key: str,
|
|
142
|
+
) -> list[Any]:
|
|
143
|
+
"""Find all values for a given key in a nested structure.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
schema: The schema or nested structure to search.
|
|
147
|
+
key: The key to search for.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
List of all values found for the key.
|
|
151
|
+
"""
|
|
152
|
+
results = []
|
|
153
|
+
if isinstance(schema, dict):
|
|
154
|
+
for k, v in schema.items():
|
|
155
|
+
if k == key:
|
|
156
|
+
results.append(v)
|
|
157
|
+
results.extend(_find_all_values_for_key(v, key))
|
|
158
|
+
elif isinstance(schema, list):
|
|
159
|
+
for item in schema:
|
|
160
|
+
results.extend(_find_all_values_for_key(item, key))
|
|
161
|
+
return results
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def validate_no_unresolved_refs(catalog: AirbyteCatalog) -> ValidationResult:
|
|
165
|
+
"""Validate that no stream schemas contain unresolved $ref values.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
catalog: The discovered catalog to validate.
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
ValidationResult indicating success or failure.
|
|
172
|
+
"""
|
|
173
|
+
errors = []
|
|
174
|
+
for stream in catalog.streams:
|
|
175
|
+
refs = _find_all_values_for_key(stream.json_schema, "$ref")
|
|
176
|
+
if refs:
|
|
177
|
+
errors.append(f"Stream '{stream.name}' has unresolved $ref values: {refs}")
|
|
178
|
+
|
|
179
|
+
if errors:
|
|
180
|
+
return ValidationResult.failure(
|
|
181
|
+
"Some streams have unresolved $ref values",
|
|
182
|
+
errors=errors,
|
|
183
|
+
)
|
|
184
|
+
return ValidationResult.success("No unresolved $ref values found")
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _find_keyword_in_schema(
|
|
188
|
+
schema: dict[str, Any] | list[Any] | str,
|
|
189
|
+
keyword: str,
|
|
190
|
+
) -> bool:
|
|
191
|
+
"""Find if a keyword exists in a schema, skipping object properties.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
schema: The schema to search.
|
|
195
|
+
keyword: The keyword to find.
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
True if keyword is found, False otherwise.
|
|
199
|
+
"""
|
|
200
|
+
|
|
201
|
+
def _find_keyword(
|
|
202
|
+
schema: dict[str, Any] | list[Any] | str,
|
|
203
|
+
key: str,
|
|
204
|
+
skip: bool = False,
|
|
205
|
+
) -> None:
|
|
206
|
+
if isinstance(schema, list):
|
|
207
|
+
for v in schema:
|
|
208
|
+
_find_keyword(v, key)
|
|
209
|
+
elif isinstance(schema, dict):
|
|
210
|
+
for k, v in schema.items():
|
|
211
|
+
if k == key and not skip:
|
|
212
|
+
raise StopIteration
|
|
213
|
+
rec_skip = k == "properties" and schema.get("type") == "object"
|
|
214
|
+
_find_keyword(v, key, rec_skip)
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
_find_keyword(schema, keyword)
|
|
218
|
+
except StopIteration:
|
|
219
|
+
return True
|
|
220
|
+
return False
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def validate_no_disallowed_keywords(
|
|
224
|
+
catalog: AirbyteCatalog,
|
|
225
|
+
keywords: list[str] | None = None,
|
|
226
|
+
) -> ValidationResult:
|
|
227
|
+
"""Validate that no stream schemas contain disallowed keywords.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
catalog: The discovered catalog to validate.
|
|
231
|
+
keywords: List of disallowed keywords. Defaults to ["allOf", "not"].
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
ValidationResult indicating success or failure.
|
|
235
|
+
"""
|
|
236
|
+
if keywords is None:
|
|
237
|
+
keywords = ["allOf", "not"]
|
|
238
|
+
|
|
239
|
+
errors = []
|
|
240
|
+
for stream in catalog.streams:
|
|
241
|
+
for keyword in keywords:
|
|
242
|
+
if _find_keyword_in_schema(stream.json_schema, keyword):
|
|
243
|
+
errors.append(
|
|
244
|
+
f"Stream '{stream.name}' contains disallowed keyword '{keyword}'"
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
if errors:
|
|
248
|
+
return ValidationResult.failure(
|
|
249
|
+
f"Some streams contain disallowed keywords: {keywords}",
|
|
250
|
+
errors=errors,
|
|
251
|
+
)
|
|
252
|
+
return ValidationResult.success("No disallowed keywords found in schemas")
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def validate_primary_keys_exist_in_schema(catalog: AirbyteCatalog) -> ValidationResult:
|
|
256
|
+
"""Validate that all primary keys are present in their stream schemas.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
catalog: The discovered catalog to validate.
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
ValidationResult indicating success or failure.
|
|
263
|
+
"""
|
|
264
|
+
errors = []
|
|
265
|
+
for stream in catalog.streams:
|
|
266
|
+
for pk in stream.source_defined_primary_key or []:
|
|
267
|
+
schema = stream.json_schema
|
|
268
|
+
if "properties" not in schema:
|
|
269
|
+
errors.append(
|
|
270
|
+
f"Stream '{stream.name}' has primary key but no 'properties' in schema"
|
|
271
|
+
)
|
|
272
|
+
continue
|
|
273
|
+
|
|
274
|
+
pk_path = "/properties/".join(pk)
|
|
275
|
+
pk_field_location = dpath.util.search(schema["properties"], pk_path)
|
|
276
|
+
if not pk_field_location:
|
|
277
|
+
errors.append(
|
|
278
|
+
f"Stream '{stream.name}': primary key {pk} not found in schema"
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
if errors:
|
|
282
|
+
return ValidationResult.failure(
|
|
283
|
+
"Some primary keys are not defined in their schemas",
|
|
284
|
+
errors=errors,
|
|
285
|
+
)
|
|
286
|
+
return ValidationResult.success("All primary keys exist in their schemas")
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def validate_streams_have_sync_modes(catalog: AirbyteCatalog) -> ValidationResult:
|
|
290
|
+
"""Validate that all streams have supported_sync_modes defined.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
catalog: The discovered catalog to validate.
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
ValidationResult indicating success or failure.
|
|
297
|
+
"""
|
|
298
|
+
errors = []
|
|
299
|
+
for stream in catalog.streams:
|
|
300
|
+
if stream.supported_sync_modes is None:
|
|
301
|
+
errors.append(
|
|
302
|
+
f"Stream '{stream.name}' is missing supported_sync_modes field"
|
|
303
|
+
)
|
|
304
|
+
elif len(stream.supported_sync_modes) == 0:
|
|
305
|
+
errors.append(f"Stream '{stream.name}' has empty supported_sync_modes list")
|
|
306
|
+
|
|
307
|
+
if errors:
|
|
308
|
+
return ValidationResult.failure(
|
|
309
|
+
"Some streams are missing sync mode declarations",
|
|
310
|
+
errors=errors,
|
|
311
|
+
)
|
|
312
|
+
return ValidationResult.success("All streams have sync modes defined")
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def validate_additional_properties_is_true(catalog: AirbyteCatalog) -> ValidationResult:
|
|
316
|
+
"""Validate that additionalProperties is always true when set.
|
|
317
|
+
|
|
318
|
+
Setting additionalProperties to false introduces risk of breaking changes
|
|
319
|
+
when removing properties from the schema.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
catalog: The discovered catalog to validate.
|
|
323
|
+
|
|
324
|
+
Returns:
|
|
325
|
+
ValidationResult indicating success or failure.
|
|
326
|
+
"""
|
|
327
|
+
errors = []
|
|
328
|
+
for stream in catalog.streams:
|
|
329
|
+
additional_props = _find_all_values_for_key(
|
|
330
|
+
stream.json_schema, "additionalProperties"
|
|
331
|
+
)
|
|
332
|
+
for value in additional_props:
|
|
333
|
+
if value is not True:
|
|
334
|
+
errors.append(
|
|
335
|
+
f"Stream '{stream.name}' has additionalProperties={value}, "
|
|
336
|
+
"should be true for backward compatibility"
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
if errors:
|
|
340
|
+
return ValidationResult.failure(
|
|
341
|
+
"Some streams have additionalProperties set to false",
|
|
342
|
+
errors=errors,
|
|
343
|
+
)
|
|
344
|
+
return ValidationResult.success(
|
|
345
|
+
"All additionalProperties values are true (or not set)"
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def validate_stream(stream: AirbyteStream) -> list[ValidationResult]:
|
|
350
|
+
"""Run all validations on a single stream.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
stream: The stream to validate.
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
List of ValidationResult objects.
|
|
357
|
+
"""
|
|
358
|
+
catalog = AirbyteCatalog(streams=[stream])
|
|
359
|
+
return [
|
|
360
|
+
validate_schemas_are_valid_json_schema(catalog),
|
|
361
|
+
validate_cursors_exist_in_schema(catalog),
|
|
362
|
+
validate_no_unresolved_refs(catalog),
|
|
363
|
+
validate_no_disallowed_keywords(catalog),
|
|
364
|
+
validate_primary_keys_exist_in_schema(catalog),
|
|
365
|
+
validate_streams_have_sync_modes(catalog),
|
|
366
|
+
validate_additional_properties_is_true(catalog),
|
|
367
|
+
]
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def validate_catalog(catalog: AirbyteCatalog) -> list[ValidationResult]:
|
|
371
|
+
"""Run all catalog validations.
|
|
372
|
+
|
|
373
|
+
Args:
|
|
374
|
+
catalog: The catalog to validate.
|
|
375
|
+
|
|
376
|
+
Returns:
|
|
377
|
+
List of ValidationResult objects.
|
|
378
|
+
"""
|
|
379
|
+
return [
|
|
380
|
+
validate_catalog_has_streams(catalog),
|
|
381
|
+
validate_no_duplicate_stream_names(catalog),
|
|
382
|
+
validate_schemas_are_valid_json_schema(catalog),
|
|
383
|
+
validate_cursors_exist_in_schema(catalog),
|
|
384
|
+
validate_no_unresolved_refs(catalog),
|
|
385
|
+
validate_no_disallowed_keywords(catalog),
|
|
386
|
+
validate_primary_keys_exist_in_schema(catalog),
|
|
387
|
+
validate_streams_have_sync_modes(catalog),
|
|
388
|
+
validate_additional_properties_is_true(catalog),
|
|
389
|
+
]
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
|
2
|
+
"""Record validation functions for connector read output.
|
|
3
|
+
|
|
4
|
+
Based on airbyte-ci validation tests:
|
|
5
|
+
https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/live-tests/src/live_tests/validation_tests/test_read.py
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from collections import defaultdict
|
|
11
|
+
from functools import reduce
|
|
12
|
+
from typing import TYPE_CHECKING, Any
|
|
13
|
+
|
|
14
|
+
import jsonschema
|
|
15
|
+
from airbyte_protocol.models import AirbyteMessage, AirbyteStateType
|
|
16
|
+
from airbyte_protocol.models import Type as AirbyteMessageType
|
|
17
|
+
|
|
18
|
+
from airbyte_ops_mcp.live_tests.validation.catalog_validators import ValidationResult
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from airbyte_ops_mcp.live_tests.models import ExecutionResult
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def validate_records_conform_to_schema(
|
|
25
|
+
execution_result: ExecutionResult,
|
|
26
|
+
) -> ValidationResult:
|
|
27
|
+
"""Validate that all records conform to their stream schemas.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
execution_result: The execution result containing records and catalog.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
ValidationResult indicating success or failure.
|
|
34
|
+
"""
|
|
35
|
+
if not execution_result.configured_catalog:
|
|
36
|
+
return ValidationResult.failure(
|
|
37
|
+
"No configured catalog available for schema validation"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
errors = []
|
|
41
|
+
stream_schemas = {
|
|
42
|
+
stream.stream.name: stream.stream.json_schema
|
|
43
|
+
for stream in execution_result.configured_catalog.streams
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
for record in execution_result.get_records():
|
|
47
|
+
stream_name = record.record.stream
|
|
48
|
+
if stream_name not in stream_schemas:
|
|
49
|
+
errors.append(f"Record for unknown stream '{stream_name}'")
|
|
50
|
+
continue
|
|
51
|
+
|
|
52
|
+
schema = stream_schemas[stream_name]
|
|
53
|
+
try:
|
|
54
|
+
jsonschema.validate(instance=record.record.data, schema=schema)
|
|
55
|
+
except jsonschema.exceptions.ValidationError as e:
|
|
56
|
+
errors.append(
|
|
57
|
+
f"Record in stream '{stream_name}' does not conform to schema: "
|
|
58
|
+
f"{e.message}"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
if errors:
|
|
62
|
+
return ValidationResult.failure(
|
|
63
|
+
"Some records do not conform to their schemas",
|
|
64
|
+
errors=errors[:10], # Limit to first 10 errors
|
|
65
|
+
)
|
|
66
|
+
return ValidationResult.success("All records conform to their schemas")
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _extract_primary_key_value(
|
|
70
|
+
record: dict[str, Any],
|
|
71
|
+
primary_key: list[list[str]],
|
|
72
|
+
) -> dict[tuple[str, ...], Any]:
|
|
73
|
+
"""Extract primary key values from a record.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
record: The record data.
|
|
77
|
+
primary_key: List of primary key paths.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Dictionary mapping primary key paths to their values.
|
|
81
|
+
"""
|
|
82
|
+
pk_values = {}
|
|
83
|
+
for pk_path in primary_key:
|
|
84
|
+
pk_value: Any = reduce(
|
|
85
|
+
lambda data, key: data.get(key) if isinstance(data, dict) else None,
|
|
86
|
+
pk_path,
|
|
87
|
+
record,
|
|
88
|
+
)
|
|
89
|
+
pk_values[tuple(pk_path)] = pk_value
|
|
90
|
+
return pk_values
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def validate_primary_keys_in_records(
|
|
94
|
+
execution_result: ExecutionResult,
|
|
95
|
+
) -> ValidationResult:
|
|
96
|
+
"""Validate that all records have non-null primary key values.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
execution_result: The execution result containing records and catalog.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
ValidationResult indicating success or failure.
|
|
103
|
+
"""
|
|
104
|
+
if not execution_result.configured_catalog:
|
|
105
|
+
return ValidationResult.failure(
|
|
106
|
+
"No configured catalog available for primary key validation"
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
errors = []
|
|
110
|
+
stream_pks: dict[str, list[list[str]]] = {}
|
|
111
|
+
for stream in execution_result.configured_catalog.streams:
|
|
112
|
+
if stream.primary_key:
|
|
113
|
+
stream_pks[stream.stream.name] = stream.primary_key
|
|
114
|
+
|
|
115
|
+
for record in execution_result.get_records():
|
|
116
|
+
stream_name = record.record.stream
|
|
117
|
+
if stream_name not in stream_pks:
|
|
118
|
+
continue
|
|
119
|
+
|
|
120
|
+
pk = stream_pks[stream_name]
|
|
121
|
+
pk_values = _extract_primary_key_value(record.record.data, pk)
|
|
122
|
+
|
|
123
|
+
for pk_path, value in pk_values.items():
|
|
124
|
+
if value is None:
|
|
125
|
+
errors.append(
|
|
126
|
+
f"Stream '{stream_name}': primary key {pk_path} has null value"
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
if errors:
|
|
130
|
+
return ValidationResult.failure(
|
|
131
|
+
"Some records have null primary key values",
|
|
132
|
+
errors=errors[:10], # Limit to first 10 errors
|
|
133
|
+
)
|
|
134
|
+
return ValidationResult.success("All records have valid primary key values")
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def validate_state_messages_emitted(
|
|
138
|
+
execution_result: ExecutionResult,
|
|
139
|
+
) -> ValidationResult:
|
|
140
|
+
"""Validate that state messages are emitted for each stream.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
execution_result: The execution result containing messages.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
ValidationResult indicating success or failure.
|
|
147
|
+
"""
|
|
148
|
+
if not execution_result.configured_catalog:
|
|
149
|
+
return ValidationResult.failure(
|
|
150
|
+
"No configured catalog available for state validation"
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
errors = []
|
|
154
|
+
warnings = []
|
|
155
|
+
|
|
156
|
+
configured_streams = {
|
|
157
|
+
stream.stream.name for stream in execution_result.configured_catalog.streams
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
state_messages_per_stream: dict[str, list[AirbyteMessage]] = defaultdict(list)
|
|
161
|
+
for message in execution_result.airbyte_messages:
|
|
162
|
+
if (
|
|
163
|
+
message.type == AirbyteMessageType.STATE
|
|
164
|
+
and message.state.stream
|
|
165
|
+
and message.state.stream.stream_descriptor
|
|
166
|
+
):
|
|
167
|
+
stream_name = message.state.stream.stream_descriptor.name
|
|
168
|
+
state_messages_per_stream[stream_name].append(message)
|
|
169
|
+
|
|
170
|
+
for stream_name in configured_streams:
|
|
171
|
+
if stream_name not in state_messages_per_stream:
|
|
172
|
+
errors.append(f"No state messages emitted for stream '{stream_name}'")
|
|
173
|
+
continue
|
|
174
|
+
|
|
175
|
+
state_messages = state_messages_per_stream[stream_name]
|
|
176
|
+
for state_msg in state_messages:
|
|
177
|
+
if state_msg.state.type == AirbyteStateType.LEGACY:
|
|
178
|
+
warnings.append(
|
|
179
|
+
f"Stream '{stream_name}' uses deprecated LEGACY state type"
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
result = ValidationResult(
|
|
183
|
+
passed=len(errors) == 0,
|
|
184
|
+
message=(
|
|
185
|
+
"State messages validation completed"
|
|
186
|
+
if len(errors) == 0
|
|
187
|
+
else "Some streams are missing state messages"
|
|
188
|
+
),
|
|
189
|
+
errors=errors,
|
|
190
|
+
warnings=warnings,
|
|
191
|
+
)
|
|
192
|
+
return result
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def validate_has_records(execution_result: ExecutionResult) -> ValidationResult:
|
|
196
|
+
"""Validate that at least one record was read.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
execution_result: The execution result containing messages.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
ValidationResult indicating success or failure.
|
|
203
|
+
"""
|
|
204
|
+
record_count = sum(1 for _ in execution_result.get_records())
|
|
205
|
+
if record_count == 0:
|
|
206
|
+
return ValidationResult.failure(
|
|
207
|
+
"No records were read",
|
|
208
|
+
errors=["At least one record should be read using the provided catalog"],
|
|
209
|
+
)
|
|
210
|
+
return ValidationResult.success(f"Read {record_count} records")
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def validate_read_output(execution_result: ExecutionResult) -> list[ValidationResult]:
|
|
214
|
+
"""Run all read output validations.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
execution_result: The execution result to validate.
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
List of ValidationResult objects.
|
|
221
|
+
"""
|
|
222
|
+
return [
|
|
223
|
+
validate_has_records(execution_result),
|
|
224
|
+
validate_records_conform_to_schema(execution_result),
|
|
225
|
+
validate_primary_keys_in_records(execution_result),
|
|
226
|
+
validate_state_messages_emitted(execution_result),
|
|
227
|
+
]
|
|
@@ -73,6 +73,9 @@ class ToolDomain(str, Enum):
|
|
|
73
73
|
PROMPTS = "prompts"
|
|
74
74
|
"""Prompt templates for common workflows"""
|
|
75
75
|
|
|
76
|
+
LIVE_TESTS = "live_tests"
|
|
77
|
+
"""Live tests for connector validation and regression testing"""
|
|
78
|
+
|
|
76
79
|
|
|
77
80
|
_REGISTERED_TOOLS: list[tuple[Callable[..., Any], dict[str, Any]]] = []
|
|
78
81
|
_REGISTERED_RESOURCES: list[tuple[Callable[..., Any], dict[str, Any]]] = []
|