airbyte-internal-ops 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {airbyte_internal_ops-0.1.3.dist-info → airbyte_internal_ops-0.1.5.dist-info}/METADATA +8 -5
  2. {airbyte_internal_ops-0.1.3.dist-info → airbyte_internal_ops-0.1.5.dist-info}/RECORD +35 -15
  3. airbyte_ops_mcp/_legacy/airbyte_ci/connector_pipelines/airbyte_ci/connectors/test/steps/common.py +1 -1
  4. airbyte_ops_mcp/airbyte_repo/list_connectors.py +44 -4
  5. airbyte_ops_mcp/airbyte_repo/utils.py +5 -3
  6. airbyte_ops_mcp/cli/cloud.py +317 -47
  7. airbyte_ops_mcp/cli/repo.py +15 -0
  8. airbyte_ops_mcp/cloud_admin/connection_config.py +131 -0
  9. airbyte_ops_mcp/live_tests/__init__.py +16 -0
  10. airbyte_ops_mcp/live_tests/_connection_retriever/__init__.py +35 -0
  11. airbyte_ops_mcp/live_tests/_connection_retriever/audit_logging.py +88 -0
  12. airbyte_ops_mcp/live_tests/_connection_retriever/consts.py +33 -0
  13. airbyte_ops_mcp/live_tests/_connection_retriever/db_access.py +82 -0
  14. airbyte_ops_mcp/live_tests/_connection_retriever/retrieval.py +391 -0
  15. airbyte_ops_mcp/live_tests/_connection_retriever/secrets_resolution.py +130 -0
  16. airbyte_ops_mcp/live_tests/config.py +190 -0
  17. airbyte_ops_mcp/live_tests/connection_fetcher.py +159 -2
  18. airbyte_ops_mcp/live_tests/connection_secret_retriever.py +173 -0
  19. airbyte_ops_mcp/live_tests/evaluation_modes.py +45 -0
  20. airbyte_ops_mcp/live_tests/http_metrics.py +81 -0
  21. airbyte_ops_mcp/live_tests/message_cache/__init__.py +15 -0
  22. airbyte_ops_mcp/live_tests/message_cache/duckdb_cache.py +415 -0
  23. airbyte_ops_mcp/live_tests/obfuscation.py +126 -0
  24. airbyte_ops_mcp/live_tests/regression/__init__.py +29 -0
  25. airbyte_ops_mcp/live_tests/regression/comparators.py +466 -0
  26. airbyte_ops_mcp/live_tests/schema_generation.py +154 -0
  27. airbyte_ops_mcp/live_tests/validation/__init__.py +43 -0
  28. airbyte_ops_mcp/live_tests/validation/catalog_validators.py +389 -0
  29. airbyte_ops_mcp/live_tests/validation/record_validators.py +227 -0
  30. airbyte_ops_mcp/mcp/_mcp_utils.py +3 -0
  31. airbyte_ops_mcp/mcp/live_tests.py +515 -0
  32. airbyte_ops_mcp/mcp/server.py +3 -0
  33. airbyte_ops_mcp/mcp/server_info.py +2 -2
  34. {airbyte_internal_ops-0.1.3.dist-info → airbyte_internal_ops-0.1.5.dist-info}/WHEEL +0 -0
  35. {airbyte_internal_ops-0.1.3.dist-info → airbyte_internal_ops-0.1.5.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,466 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+ """Comparison functions for regression testing.
3
+
4
+ This module provides functions for comparing control and target connector
5
+ outputs to detect regressions in data integrity.
6
+
7
+ Based on airbyte-ci implementation:
8
+ https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/live-tests/src/live_tests/regression_tests/test_read.py
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import logging
15
+ from dataclasses import dataclass, field
16
+ from typing import Any
17
+
18
+ from airbyte_protocol.models import AirbyteMessage
19
+ from deepdiff import DeepDiff
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Fields to exclude when comparing records (timestamps vary between runs)
24
+ EXCLUDE_PATHS = ["emitted_at"]
25
+
26
+
27
+ @dataclass
28
+ class RecordDiff:
29
+ """Represents a diff between control and target records."""
30
+
31
+ stream_name: str
32
+ records_with_value_diff: list[dict[str, Any]] = field(default_factory=list)
33
+ records_only_in_control: list[dict[str, Any]] = field(default_factory=list)
34
+ records_only_in_target: list[dict[str, Any]] = field(default_factory=list)
35
+
36
+ @property
37
+ def has_diff(self) -> bool:
38
+ return bool(
39
+ self.records_with_value_diff
40
+ or self.records_only_in_control
41
+ or self.records_only_in_target
42
+ )
43
+
44
+
45
+ @dataclass
46
+ class StreamComparisonResult:
47
+ """Result of comparing a single stream between control and target."""
48
+
49
+ stream_name: str
50
+ passed: bool
51
+ control_count: int = 0
52
+ target_count: int = 0
53
+ missing_pks: list[Any] = field(default_factory=list)
54
+ extra_pks: list[Any] = field(default_factory=list)
55
+ record_diff: RecordDiff | None = None
56
+ schema_diff: dict[str, Any] | None = None
57
+ message: str = ""
58
+
59
+
60
+ @dataclass
61
+ class ComparisonResult:
62
+ """Result of comparing control and target connector outputs."""
63
+
64
+ passed: bool
65
+ stream_results: dict[str, StreamComparisonResult] = field(default_factory=dict)
66
+ message: str = ""
67
+ errors: list[str] = field(default_factory=list)
68
+ warnings: list[str] = field(default_factory=list)
69
+
70
+ @property
71
+ def failed_streams(self) -> list[str]:
72
+ return [
73
+ name for name, result in self.stream_results.items() if not result.passed
74
+ ]
75
+
76
+
77
+ def compare_record_counts(
78
+ control_records: dict[str, list[AirbyteMessage]],
79
+ target_records: dict[str, list[AirbyteMessage]],
80
+ ) -> ComparisonResult:
81
+ """Compare record counts between control and target versions.
82
+
83
+ This is the first level of regression testing - checking that the target
84
+ version produces at least the same number of records as the control.
85
+
86
+ Based on airbyte-ci implementation:
87
+ https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/live-tests/src/live_tests/regression_tests/test_read.py#L100-L131
88
+ """
89
+ stream_results: dict[str, StreamComparisonResult] = {}
90
+ errors: list[str] = []
91
+
92
+ all_streams = set(control_records.keys()) | set(target_records.keys())
93
+
94
+ for stream_name in all_streams:
95
+ control_count = len(control_records.get(stream_name, []))
96
+ target_count = len(target_records.get(stream_name, []))
97
+ delta = target_count - control_count
98
+
99
+ passed = delta >= 0 # Target should have at least as many records
100
+
101
+ message = ""
102
+ if delta > 0:
103
+ message = (
104
+ f"Stream {stream_name} has {delta} more records in target "
105
+ f"({target_count} vs {control_count})"
106
+ )
107
+ elif delta < 0:
108
+ message = (
109
+ f"Stream {stream_name} has {-delta} fewer records in target "
110
+ f"({target_count} vs {control_count})"
111
+ )
112
+ errors.append(message)
113
+
114
+ stream_results[stream_name] = StreamComparisonResult(
115
+ stream_name=stream_name,
116
+ passed=passed,
117
+ control_count=control_count,
118
+ target_count=target_count,
119
+ message=message,
120
+ )
121
+
122
+ all_passed = all(r.passed for r in stream_results.values())
123
+ return ComparisonResult(
124
+ passed=all_passed,
125
+ stream_results=stream_results,
126
+ message="Record counts match" if all_passed else "Record count mismatch",
127
+ errors=errors,
128
+ )
129
+
130
+
131
+ def compare_primary_keys(
132
+ control_records: dict[str, list[AirbyteMessage]],
133
+ target_records: dict[str, list[AirbyteMessage]],
134
+ primary_keys_per_stream: dict[str, list[str] | None],
135
+ ) -> ComparisonResult:
136
+ """Compare primary keys between control and target versions.
137
+
138
+ This checks that all primary key values from the control version are
139
+ present in the target version for each stream.
140
+
141
+ Based on airbyte-ci implementation:
142
+ https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/live-tests/src/live_tests/regression_tests/test_read.py#L37-L98
143
+ """
144
+ stream_results: dict[str, StreamComparisonResult] = {}
145
+ errors: list[str] = []
146
+ warnings: list[str] = []
147
+
148
+ for stream_name, control_msgs in control_records.items():
149
+ pk_fields = primary_keys_per_stream.get(stream_name)
150
+ if not pk_fields:
151
+ warnings.append(
152
+ f"No primary keys defined for stream {stream_name}, skipping PK check"
153
+ )
154
+ stream_results[stream_name] = StreamComparisonResult(
155
+ stream_name=stream_name,
156
+ passed=True,
157
+ message="Skipped - no primary keys defined",
158
+ )
159
+ continue
160
+
161
+ # Extract primary key values
162
+ control_pks = _extract_pk_values(control_msgs, pk_fields)
163
+ target_msgs = target_records.get(stream_name, [])
164
+ target_pks = _extract_pk_values(target_msgs, pk_fields)
165
+
166
+ missing_pks = list(control_pks - target_pks)
167
+ extra_pks = list(target_pks - control_pks)
168
+
169
+ passed = len(missing_pks) == 0
170
+
171
+ message = ""
172
+ if missing_pks:
173
+ message = f"Stream {stream_name} is missing {len(missing_pks)} primary keys in target"
174
+ errors.append(message)
175
+
176
+ stream_results[stream_name] = StreamComparisonResult(
177
+ stream_name=stream_name,
178
+ passed=passed,
179
+ control_count=len(control_pks),
180
+ target_count=len(target_pks),
181
+ missing_pks=missing_pks,
182
+ extra_pks=extra_pks,
183
+ message=message,
184
+ )
185
+
186
+ all_passed = all(r.passed for r in stream_results.values())
187
+ return ComparisonResult(
188
+ passed=all_passed,
189
+ stream_results=stream_results,
190
+ message="All primary keys present" if all_passed else "Missing primary keys",
191
+ errors=errors,
192
+ warnings=warnings,
193
+ )
194
+
195
+
196
+ def compare_all_records(
197
+ control_records: dict[str, list[AirbyteMessage]],
198
+ target_records: dict[str, list[AirbyteMessage]],
199
+ primary_keys_per_stream: dict[str, list[str] | None] | None = None,
200
+ exclude_paths: list[str] | None = None,
201
+ ) -> ComparisonResult:
202
+ """Compare all records between control and target versions.
203
+
204
+ This is the strictest level of regression testing - checking that all
205
+ records are identical between control and target (excluding timestamps).
206
+
207
+ Based on airbyte-ci implementation:
208
+ https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/live-tests/src/live_tests/regression_tests/test_read.py#L133-L183
209
+ """
210
+ if exclude_paths is None:
211
+ exclude_paths = EXCLUDE_PATHS
212
+
213
+ if primary_keys_per_stream is None:
214
+ primary_keys_per_stream = {}
215
+
216
+ stream_results: dict[str, StreamComparisonResult] = {}
217
+ errors: list[str] = []
218
+
219
+ all_streams = set(control_records.keys()) | set(target_records.keys())
220
+
221
+ for stream_name in all_streams:
222
+ control_msgs = control_records.get(stream_name, [])
223
+ target_msgs = target_records.get(stream_name, [])
224
+
225
+ if control_msgs and not target_msgs:
226
+ errors.append(f"Stream {stream_name} is missing in target version")
227
+ stream_results[stream_name] = StreamComparisonResult(
228
+ stream_name=stream_name,
229
+ passed=False,
230
+ control_count=len(control_msgs),
231
+ target_count=0,
232
+ message=f"Stream {stream_name} is missing in target version",
233
+ )
234
+ continue
235
+
236
+ pk_fields = primary_keys_per_stream.get(stream_name)
237
+ if pk_fields:
238
+ record_diff = _compare_records_with_pk(
239
+ stream_name=stream_name,
240
+ control_msgs=control_msgs,
241
+ target_msgs=target_msgs,
242
+ pk_fields=pk_fields,
243
+ exclude_paths=exclude_paths,
244
+ )
245
+ else:
246
+ record_diff = _compare_records_without_pk(
247
+ stream_name=stream_name,
248
+ control_msgs=control_msgs,
249
+ target_msgs=target_msgs,
250
+ exclude_paths=exclude_paths,
251
+ )
252
+
253
+ passed = not record_diff.has_diff
254
+ message = ""
255
+ if not passed:
256
+ message = f"Stream {stream_name} has record differences"
257
+ errors.append(message)
258
+
259
+ stream_results[stream_name] = StreamComparisonResult(
260
+ stream_name=stream_name,
261
+ passed=passed,
262
+ control_count=len(control_msgs),
263
+ target_count=len(target_msgs),
264
+ record_diff=record_diff,
265
+ message=message,
266
+ )
267
+
268
+ all_passed = all(r.passed for r in stream_results.values())
269
+ return ComparisonResult(
270
+ passed=all_passed,
271
+ stream_results=stream_results,
272
+ message="All records match" if all_passed else "Record differences found",
273
+ errors=errors,
274
+ )
275
+
276
+
277
+ def compare_record_schemas(
278
+ control_records: dict[str, list[AirbyteMessage]],
279
+ target_records: dict[str, list[AirbyteMessage]],
280
+ ) -> ComparisonResult:
281
+ """Compare inferred schemas between control and target versions.
282
+
283
+ This compares the structure of records (field names and types) between
284
+ control and target versions.
285
+
286
+ Based on airbyte-ci implementation:
287
+ https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/live-tests/src/live_tests/regression_tests/test_read.py#L185-L234
288
+ """
289
+ stream_results: dict[str, StreamComparisonResult] = {}
290
+ errors: list[str] = []
291
+ warnings: list[str] = []
292
+
293
+ all_streams = set(control_records.keys()) | set(target_records.keys())
294
+
295
+ for stream_name in all_streams:
296
+ control_msgs = control_records.get(stream_name, [])
297
+ target_msgs = target_records.get(stream_name, [])
298
+
299
+ if not control_msgs:
300
+ warnings.append(f"Stream {stream_name} has no records in control version")
301
+ continue
302
+
303
+ if not target_msgs:
304
+ warnings.append(f"Stream {stream_name} has no records in target version")
305
+ stream_results[stream_name] = StreamComparisonResult(
306
+ stream_name=stream_name,
307
+ passed=False,
308
+ message=f"Stream {stream_name} has no records in target version",
309
+ )
310
+ errors.append(f"Stream {stream_name} has no records in target version")
311
+ continue
312
+
313
+ # Infer schema from first record of each
314
+ control_schema = _infer_schema_from_record(control_msgs[0])
315
+ target_schema = _infer_schema_from_record(target_msgs[0])
316
+
317
+ diff = DeepDiff(
318
+ control_schema,
319
+ target_schema,
320
+ ignore_order=True,
321
+ )
322
+
323
+ passed = not diff
324
+ schema_diff = diff.to_dict() if diff else None
325
+
326
+ message = ""
327
+ if not passed:
328
+ message = f"Stream {stream_name} has schema differences"
329
+ errors.append(message)
330
+
331
+ stream_results[stream_name] = StreamComparisonResult(
332
+ stream_name=stream_name,
333
+ passed=passed,
334
+ control_count=len(control_msgs),
335
+ target_count=len(target_msgs),
336
+ schema_diff=schema_diff,
337
+ message=message,
338
+ )
339
+
340
+ all_passed = all(r.passed for r in stream_results.values())
341
+ return ComparisonResult(
342
+ passed=all_passed,
343
+ stream_results=stream_results,
344
+ message="All schemas match" if all_passed else "Schema differences found",
345
+ errors=errors,
346
+ warnings=warnings,
347
+ )
348
+
349
+
350
+ def _extract_pk_values(
351
+ messages: list[AirbyteMessage],
352
+ pk_fields: list[str],
353
+ ) -> set[tuple]:
354
+ """Extract primary key values from a list of messages."""
355
+ pk_values: set[tuple] = set()
356
+ for msg in messages:
357
+ if msg.record and msg.record.data:
358
+ pk_tuple = tuple(msg.record.data.get(field) for field in pk_fields)
359
+ pk_values.add(pk_tuple)
360
+ return pk_values
361
+
362
+
363
+ def _compare_records_with_pk(
364
+ stream_name: str,
365
+ control_msgs: list[AirbyteMessage],
366
+ target_msgs: list[AirbyteMessage],
367
+ pk_fields: list[str],
368
+ exclude_paths: list[str],
369
+ ) -> RecordDiff:
370
+ """Compare records using primary keys for matching."""
371
+ # Build lookup by PK
372
+ control_by_pk: dict[tuple, dict] = {}
373
+ for msg in control_msgs:
374
+ if msg.record and msg.record.data:
375
+ pk = tuple(msg.record.data.get(field) for field in pk_fields)
376
+ control_by_pk[pk] = json.loads(msg.record.model_dump_json())
377
+
378
+ target_by_pk: dict[tuple, dict] = {}
379
+ for msg in target_msgs:
380
+ if msg.record and msg.record.data:
381
+ pk = tuple(msg.record.data.get(field) for field in pk_fields)
382
+ target_by_pk[pk] = json.loads(msg.record.model_dump_json())
383
+
384
+ control_pks = set(control_by_pk.keys())
385
+ target_pks = set(target_by_pk.keys())
386
+
387
+ # Records only in control
388
+ records_only_in_control = [control_by_pk[pk] for pk in (control_pks - target_pks)]
389
+
390
+ # Records only in target
391
+ records_only_in_target = [target_by_pk[pk] for pk in (target_pks - control_pks)]
392
+
393
+ # Records with value differences (same PK, different values)
394
+ records_with_value_diff = []
395
+ common_pks = control_pks & target_pks
396
+ for pk in common_pks:
397
+ control_record = control_by_pk[pk]
398
+ target_record = target_by_pk[pk]
399
+ diff = DeepDiff(
400
+ control_record,
401
+ target_record,
402
+ ignore_order=True,
403
+ exclude_paths=[f"root['{p}']" for p in exclude_paths],
404
+ )
405
+ if diff:
406
+ records_with_value_diff.append(
407
+ {
408
+ "pk": pk,
409
+ "control": control_record,
410
+ "target": target_record,
411
+ "diff": diff.to_dict(),
412
+ }
413
+ )
414
+
415
+ return RecordDiff(
416
+ stream_name=stream_name,
417
+ records_with_value_diff=records_with_value_diff,
418
+ records_only_in_control=records_only_in_control,
419
+ records_only_in_target=records_only_in_target,
420
+ )
421
+
422
+
423
+ def _compare_records_without_pk(
424
+ stream_name: str,
425
+ control_msgs: list[AirbyteMessage],
426
+ target_msgs: list[AirbyteMessage],
427
+ exclude_paths: list[str],
428
+ ) -> RecordDiff:
429
+ """Compare records without primary keys (order-independent comparison)."""
430
+ control_records = [
431
+ json.loads(msg.record.model_dump_json()) for msg in control_msgs if msg.record
432
+ ]
433
+ target_records = [
434
+ json.loads(msg.record.model_dump_json()) for msg in target_msgs if msg.record
435
+ ]
436
+
437
+ diff = DeepDiff(
438
+ control_records,
439
+ target_records,
440
+ ignore_order=True,
441
+ exclude_paths=[f"root[*]['{p}']" for p in exclude_paths],
442
+ )
443
+
444
+ records_with_value_diff = []
445
+ if diff:
446
+ records_with_value_diff.append(
447
+ {
448
+ "diff": diff.to_dict(),
449
+ }
450
+ )
451
+
452
+ return RecordDiff(
453
+ stream_name=stream_name,
454
+ records_with_value_diff=records_with_value_diff,
455
+ )
456
+
457
+
458
+ def _infer_schema_from_record(message: AirbyteMessage) -> dict[str, str]:
459
+ """Infer a simple schema (field -> type) from a record."""
460
+ if not message.record or not message.record.data:
461
+ return {}
462
+
463
+ schema: dict[str, str] = {}
464
+ for key, value in message.record.data.items():
465
+ schema[key] = type(value).__name__
466
+ return schema
@@ -0,0 +1,154 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+ """Stream schema generation utilities for live tests.
3
+
4
+ This module provides functions for inferring JSON schemas from Airbyte
5
+ record messages, useful for comparing schemas between connector versions.
6
+
7
+ Based on airbyte-ci implementation:
8
+ https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/live-tests/src/live_tests/commons/models.py#L355-L366
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import logging
15
+ from pathlib import Path
16
+ from typing import Any
17
+
18
+ from airbyte_protocol.models import AirbyteMessage
19
+ from airbyte_protocol.models import Type as AirbyteMessageType
20
+ from genson import SchemaBuilder
21
+
22
+ from airbyte_ops_mcp.live_tests.obfuscation import convert_obfuscated_record_to_typed
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ def sort_dict_keys(d: dict[str, Any]) -> dict[str, Any]:
28
+ """Recursively sort dictionary keys for consistent output."""
29
+ if isinstance(d, dict):
30
+ return {k: sort_dict_keys(v) for k, v in sorted(d.items())}
31
+ if isinstance(d, list):
32
+ return [sort_dict_keys(item) for item in d]
33
+ return d
34
+
35
+
36
+ def generate_schema_from_records(
37
+ records: list[dict[str, Any]],
38
+ obfuscated: bool = False,
39
+ ) -> dict[str, Any]:
40
+ """Generate a JSON schema from a list of record data dictionaries.
41
+
42
+ If records are obfuscated, they are first converted to typed values
43
+ for proper schema inference.
44
+ """
45
+ builder = SchemaBuilder()
46
+ builder.add_schema({"type": "object", "properties": {}})
47
+
48
+ for record_data in records:
49
+ if obfuscated:
50
+ record_data = convert_obfuscated_record_to_typed(record_data)
51
+ builder.add_object(record_data)
52
+
53
+ return sort_dict_keys(builder.to_schema())
54
+
55
+
56
+ def generate_stream_schemas(
57
+ messages: list[AirbyteMessage],
58
+ obfuscated: bool = False,
59
+ ) -> dict[str, dict[str, Any]]:
60
+ """Generate JSON schemas for each stream from Airbyte messages.
61
+
62
+ Groups RECORD messages by stream name and infers a schema for each stream.
63
+
64
+ Based on airbyte-ci implementation:
65
+ https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/live-tests/src/live_tests/commons/models.py#L355-L366
66
+ """
67
+ logger.info("Generating stream schemas")
68
+ stream_builders: dict[str, SchemaBuilder] = {}
69
+
70
+ for message in messages:
71
+ if message.type != AirbyteMessageType.RECORD:
72
+ continue
73
+ if not message.record or not message.record.data:
74
+ continue
75
+
76
+ stream_name = message.record.stream
77
+ if stream_name not in stream_builders:
78
+ builder = SchemaBuilder()
79
+ builder.add_schema({"type": "object", "properties": {}})
80
+ stream_builders[stream_name] = builder
81
+
82
+ record_data = message.record.data
83
+ if obfuscated:
84
+ record_data = convert_obfuscated_record_to_typed(record_data)
85
+
86
+ stream_builders[stream_name].add_object(record_data)
87
+
88
+ logger.info("Stream schemas generated")
89
+ return {
90
+ stream: sort_dict_keys(stream_builders[stream].to_schema())
91
+ for stream in stream_builders
92
+ }
93
+
94
+
95
+ def save_stream_schemas(
96
+ schemas: dict[str, dict[str, Any]],
97
+ output_dir: Path,
98
+ ) -> None:
99
+ """Save stream schemas to individual JSON files.
100
+
101
+ Creates a directory and saves each stream's schema as a separate JSON file.
102
+
103
+ Based on airbyte-ci implementation:
104
+ https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/live-tests/src/live_tests/commons/models.py#L456-L462
105
+ """
106
+ import re
107
+
108
+ def sanitize_stream_name(stream_name: str) -> str:
109
+ """Sanitize a stream name for use as a file name."""
110
+ return re.sub(r"[^a-zA-Z0-9_]", "_", stream_name)
111
+
112
+ output_dir.mkdir(parents=True, exist_ok=True)
113
+
114
+ for stream_name, schema in schemas.items():
115
+ file_name = f"{sanitize_stream_name(stream_name)}.json"
116
+ file_path = output_dir / file_name
117
+ file_path.write_text(json.dumps(schema, sort_keys=True, indent=2))
118
+
119
+ logger.info(f"Stream schemas saved to {output_dir}")
120
+
121
+
122
+ def compare_stream_schemas(
123
+ control_schemas: dict[str, dict[str, Any]],
124
+ target_schemas: dict[str, dict[str, Any]],
125
+ ) -> dict[str, dict[str, Any]]:
126
+ """Compare schemas between control and target versions.
127
+
128
+ Returns a dictionary with differences for each stream that has schema changes.
129
+ """
130
+ from deepdiff import DeepDiff
131
+
132
+ differences: dict[str, dict[str, Any]] = {}
133
+
134
+ all_streams = set(control_schemas.keys()) | set(target_schemas.keys())
135
+
136
+ for stream in all_streams:
137
+ control_schema = control_schemas.get(stream, {})
138
+ target_schema = target_schemas.get(stream, {})
139
+
140
+ if not control_schema and target_schema:
141
+ differences[stream] = {"status": "new_stream", "schema": target_schema}
142
+ elif control_schema and not target_schema:
143
+ differences[stream] = {"status": "removed_stream", "schema": control_schema}
144
+ else:
145
+ diff = DeepDiff(control_schema, target_schema, ignore_order=True)
146
+ if diff:
147
+ differences[stream] = {
148
+ "status": "changed",
149
+ "diff": diff.to_dict(),
150
+ "control_schema": control_schema,
151
+ "target_schema": target_schema,
152
+ }
153
+
154
+ return differences
@@ -0,0 +1,43 @@
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+ """Validation functions for connector output.
3
+
4
+ This module provides validation functions for verifying connector output
5
+ conforms to the Airbyte protocol and best practices.
6
+
7
+ Based on airbyte-ci validation tests:
8
+ https://github.com/airbytehq/airbyte/tree/master/airbyte-ci/connectors/live-tests/src/live_tests/validation_tests
9
+ """
10
+
11
+ from airbyte_ops_mcp.live_tests.validation.catalog_validators import (
12
+ ValidationResult,
13
+ validate_additional_properties_is_true,
14
+ validate_catalog,
15
+ validate_catalog_has_streams,
16
+ validate_cursors_exist_in_schema,
17
+ validate_no_duplicate_stream_names,
18
+ validate_no_unresolved_refs,
19
+ validate_primary_keys_exist_in_schema,
20
+ validate_schemas_are_valid_json_schema,
21
+ validate_streams_have_sync_modes,
22
+ )
23
+ from airbyte_ops_mcp.live_tests.validation.record_validators import (
24
+ validate_primary_keys_in_records,
25
+ validate_records_conform_to_schema,
26
+ validate_state_messages_emitted,
27
+ )
28
+
29
+ __all__ = [
30
+ "ValidationResult",
31
+ "validate_additional_properties_is_true",
32
+ "validate_catalog",
33
+ "validate_catalog_has_streams",
34
+ "validate_cursors_exist_in_schema",
35
+ "validate_no_duplicate_stream_names",
36
+ "validate_no_unresolved_refs",
37
+ "validate_primary_keys_exist_in_schema",
38
+ "validate_primary_keys_in_records",
39
+ "validate_records_conform_to_schema",
40
+ "validate_schemas_are_valid_json_schema",
41
+ "validate_state_messages_emitted",
42
+ "validate_streams_have_sync_modes",
43
+ ]