airbyte-agent-airtable 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of airbyte-agent-airtable might be problematic. Click here for more details.

Files changed (58) hide show
  1. airbyte_agent_airtable/__init__.py +81 -0
  2. airbyte_agent_airtable/_vendored/__init__.py +1 -0
  3. airbyte_agent_airtable/_vendored/connector_sdk/__init__.py +82 -0
  4. airbyte_agent_airtable/_vendored/connector_sdk/auth_strategies.py +1171 -0
  5. airbyte_agent_airtable/_vendored/connector_sdk/auth_template.py +135 -0
  6. airbyte_agent_airtable/_vendored/connector_sdk/cloud_utils/__init__.py +5 -0
  7. airbyte_agent_airtable/_vendored/connector_sdk/cloud_utils/client.py +338 -0
  8. airbyte_agent_airtable/_vendored/connector_sdk/connector_model_loader.py +1121 -0
  9. airbyte_agent_airtable/_vendored/connector_sdk/constants.py +78 -0
  10. airbyte_agent_airtable/_vendored/connector_sdk/exceptions.py +23 -0
  11. airbyte_agent_airtable/_vendored/connector_sdk/executor/__init__.py +31 -0
  12. airbyte_agent_airtable/_vendored/connector_sdk/executor/hosted_executor.py +230 -0
  13. airbyte_agent_airtable/_vendored/connector_sdk/executor/local_executor.py +1848 -0
  14. airbyte_agent_airtable/_vendored/connector_sdk/executor/models.py +202 -0
  15. airbyte_agent_airtable/_vendored/connector_sdk/extensions.py +693 -0
  16. airbyte_agent_airtable/_vendored/connector_sdk/http/__init__.py +37 -0
  17. airbyte_agent_airtable/_vendored/connector_sdk/http/adapters/__init__.py +9 -0
  18. airbyte_agent_airtable/_vendored/connector_sdk/http/adapters/httpx_adapter.py +260 -0
  19. airbyte_agent_airtable/_vendored/connector_sdk/http/config.py +98 -0
  20. airbyte_agent_airtable/_vendored/connector_sdk/http/exceptions.py +119 -0
  21. airbyte_agent_airtable/_vendored/connector_sdk/http/protocols.py +114 -0
  22. airbyte_agent_airtable/_vendored/connector_sdk/http/response.py +104 -0
  23. airbyte_agent_airtable/_vendored/connector_sdk/http_client.py +693 -0
  24. airbyte_agent_airtable/_vendored/connector_sdk/introspection.py +481 -0
  25. airbyte_agent_airtable/_vendored/connector_sdk/logging/__init__.py +11 -0
  26. airbyte_agent_airtable/_vendored/connector_sdk/logging/logger.py +273 -0
  27. airbyte_agent_airtable/_vendored/connector_sdk/logging/types.py +93 -0
  28. airbyte_agent_airtable/_vendored/connector_sdk/observability/__init__.py +11 -0
  29. airbyte_agent_airtable/_vendored/connector_sdk/observability/config.py +179 -0
  30. airbyte_agent_airtable/_vendored/connector_sdk/observability/models.py +19 -0
  31. airbyte_agent_airtable/_vendored/connector_sdk/observability/redactor.py +81 -0
  32. airbyte_agent_airtable/_vendored/connector_sdk/observability/session.py +103 -0
  33. airbyte_agent_airtable/_vendored/connector_sdk/performance/__init__.py +6 -0
  34. airbyte_agent_airtable/_vendored/connector_sdk/performance/instrumentation.py +57 -0
  35. airbyte_agent_airtable/_vendored/connector_sdk/performance/metrics.py +93 -0
  36. airbyte_agent_airtable/_vendored/connector_sdk/schema/__init__.py +75 -0
  37. airbyte_agent_airtable/_vendored/connector_sdk/schema/base.py +212 -0
  38. airbyte_agent_airtable/_vendored/connector_sdk/schema/components.py +244 -0
  39. airbyte_agent_airtable/_vendored/connector_sdk/schema/connector.py +120 -0
  40. airbyte_agent_airtable/_vendored/connector_sdk/schema/extensions.py +301 -0
  41. airbyte_agent_airtable/_vendored/connector_sdk/schema/operations.py +156 -0
  42. airbyte_agent_airtable/_vendored/connector_sdk/schema/security.py +241 -0
  43. airbyte_agent_airtable/_vendored/connector_sdk/secrets.py +182 -0
  44. airbyte_agent_airtable/_vendored/connector_sdk/telemetry/__init__.py +10 -0
  45. airbyte_agent_airtable/_vendored/connector_sdk/telemetry/config.py +32 -0
  46. airbyte_agent_airtable/_vendored/connector_sdk/telemetry/events.py +59 -0
  47. airbyte_agent_airtable/_vendored/connector_sdk/telemetry/tracker.py +155 -0
  48. airbyte_agent_airtable/_vendored/connector_sdk/types.py +274 -0
  49. airbyte_agent_airtable/_vendored/connector_sdk/utils.py +127 -0
  50. airbyte_agent_airtable/_vendored/connector_sdk/validation.py +997 -0
  51. airbyte_agent_airtable/_vendored/connector_sdk/validation_replication.py +970 -0
  52. airbyte_agent_airtable/connector.py +834 -0
  53. airbyte_agent_airtable/connector_model.py +365 -0
  54. airbyte_agent_airtable/models.py +219 -0
  55. airbyte_agent_airtable/types.py +367 -0
  56. airbyte_agent_airtable-0.1.5.dist-info/METADATA +140 -0
  57. airbyte_agent_airtable-0.1.5.dist-info/RECORD +58 -0
  58. airbyte_agent_airtable-0.1.5.dist-info/WHEEL +4 -0
@@ -0,0 +1,997 @@
1
+ """
2
+ Validation tools for connector readiness and schema compliance.
3
+
4
+ These tools help ensure that connectors are ready to ship by:
5
+ - Checking that all entity/action operations have corresponding test cassettes
6
+ - Validating that response schemas match the actual cassette responses
7
+ - Detecting fields present in responses but not declared in schemas
8
+ - Validating replication compatibility with Airbyte source connectors
9
+ """
10
+
11
+ from collections import defaultdict
12
+ from pathlib import Path
13
+ from typing import Any, Dict, List, Tuple
14
+
15
+ import jsonschema
16
+ import yaml
17
+ from jsonpath_ng import parse as parse_jsonpath
18
+
19
+ from .connector_model_loader import (
20
+ ConnectorModelLoaderError,
21
+ load_connector_model,
22
+ )
23
+ from .testing.spec_loader import load_test_spec
24
+ from .types import Action, ConnectorModel, EndpointDefinition
25
+ from .utils import infer_auth_scheme_name
26
+ from .validation_replication import validate_replication_compatibility
27
+
28
+
29
+ def build_cassette_map(cassettes_dir: Path) -> Dict[Tuple[str, str], List[Path]]:
30
+ """Build a map of (entity, action) -> list of cassette paths.
31
+
32
+ Reads the entity/action from TestSpec.entity and TestSpec.action fields,
33
+ not from the filename.
34
+
35
+ Args:
36
+ cassettes_dir: Directory containing cassette YAML files
37
+
38
+ Returns:
39
+ Dictionary mapping (entity, action) tuples to lists of cassette file paths
40
+ """
41
+ cassette_map: Dict[Tuple[str, str], List[Path]] = defaultdict(list)
42
+
43
+ if not cassettes_dir.exists() or not cassettes_dir.is_dir():
44
+ return {}
45
+
46
+ for cassette_file in cassettes_dir.glob("*.yaml"):
47
+ try:
48
+ spec = load_test_spec(cassette_file, auth_config={})
49
+ key = (spec.entity, spec.action)
50
+ cassette_map[key].append(cassette_file)
51
+ except Exception:
52
+ continue
53
+
54
+ return dict(cassette_map)
55
+
56
+
57
+ def build_auth_scheme_coverage(
58
+ cassettes_dir: Path,
59
+ auth_options: list | None = None,
60
+ ) -> Tuple[Dict[str | None, List[Path]], List[Tuple[Path, set[str]]]]:
61
+ """Build a map of auth_scheme -> list of cassette paths.
62
+
63
+ For multi-auth connectors, infers the auth scheme from the cassette's auth_config
64
+ keys using the same matching logic as the executor.
65
+
66
+ Args:
67
+ cassettes_dir: Directory containing cassette YAML files
68
+ auth_options: List of AuthOption from the connector model (for inference)
69
+
70
+ Returns:
71
+ Tuple of:
72
+ - Dictionary mapping auth_scheme names (or None for single-auth) to cassette paths
73
+ - List of (cassette_path, auth_config_keys) for cassettes that couldn't be matched
74
+ """
75
+ auth_scheme_map: Dict[str | None, List[Path]] = defaultdict(list)
76
+ unmatched_cassettes: List[Tuple[Path, set[str]]] = []
77
+
78
+ if not cassettes_dir.exists() or not cassettes_dir.is_dir():
79
+ return {}, []
80
+
81
+ for cassette_file in cassettes_dir.glob("*.yaml"):
82
+ try:
83
+ spec = load_test_spec(cassette_file, auth_config={})
84
+
85
+ # First, check if auth_scheme is explicitly set in the cassette
86
+ if spec.auth_scheme:
87
+ auth_scheme_map[spec.auth_scheme].append(cassette_file)
88
+ # Otherwise, try to infer from auth_config keys
89
+ elif spec.auth_config and auth_options:
90
+ auth_config_keys = set(spec.auth_config.keys())
91
+ inferred_scheme = infer_auth_scheme_name(auth_config_keys, auth_options)
92
+ if inferred_scheme is not None:
93
+ auth_scheme_map[inferred_scheme].append(cassette_file)
94
+ else:
95
+ # Couldn't infer - track as unmatched
96
+ unmatched_cassettes.append((cassette_file, auth_config_keys))
97
+ else:
98
+ # No auth_scheme and no auth_config - treat as None
99
+ auth_scheme_map[None].append(cassette_file)
100
+ except Exception:
101
+ continue
102
+
103
+ return dict(auth_scheme_map), unmatched_cassettes
104
+
105
+
106
+ def validate_auth_scheme_coverage(
107
+ config: ConnectorModel,
108
+ cassettes_dir: Path,
109
+ ) -> Tuple[bool, List[str], List[str], List[str], List[Tuple[Path, set[str]]]]:
110
+ """Validate that each auth scheme has at least one cassette.
111
+
112
+ For multi-auth connectors, every defined auth scheme must have coverage
113
+ unless marked with x-airbyte-untested: true.
114
+ For single-auth connectors, this check is skipped (existing cassette checks suffice).
115
+
116
+ Args:
117
+ config: Loaded connector model
118
+ cassettes_dir: Directory containing cassette files
119
+
120
+ Returns:
121
+ Tuple of (is_valid, errors, warnings, covered_schemes, unmatched_cassettes)
122
+ """
123
+ errors: List[str] = []
124
+ warnings: List[str] = []
125
+
126
+ # Skip check for single-auth connectors
127
+ if not config.auth.is_multi_auth():
128
+ return True, errors, warnings, [], []
129
+
130
+ # Get all defined auth schemes, separating tested from untested
131
+ options = config.auth.options or []
132
+
133
+ # Build auth scheme coverage from cassettes (pass options for inference)
134
+ auth_scheme_coverage, unmatched_cassettes = build_auth_scheme_coverage(cassettes_dir, options)
135
+ tested_schemes = {opt.scheme_name for opt in options if not opt.untested}
136
+ untested_schemes = {opt.scheme_name for opt in options if opt.untested}
137
+ covered_schemes = {scheme for scheme in auth_scheme_coverage.keys() if scheme is not None}
138
+
139
+ # Find missing tested schemes (errors)
140
+ missing_tested = tested_schemes - covered_schemes
141
+ for scheme in sorted(missing_tested):
142
+ errors.append(
143
+ f"Auth scheme '{scheme}' has no cassette coverage. "
144
+ f"Record at least one cassette using this authentication method, "
145
+ f"or add 'x-airbyte-untested: true' to skip this check."
146
+ )
147
+
148
+ # Warn about untested schemes without coverage
149
+ missing_untested = untested_schemes - covered_schemes
150
+ for scheme in sorted(missing_untested):
151
+ warnings.append(
152
+ f"Auth scheme '{scheme}' is marked as untested (x-airbyte-untested: true) " f"and has no cassette coverage. Validation skipped."
153
+ )
154
+
155
+ # Warn about cassettes that couldn't be matched to any auth scheme
156
+ for cassette_path, auth_config_keys in unmatched_cassettes:
157
+ warnings.append(f"Cassette '{cassette_path.name}' could not be matched to any auth scheme. " f"auth_config keys: {sorted(auth_config_keys)}")
158
+
159
+ is_valid = len(missing_tested) == 0
160
+ return is_valid, errors, warnings, sorted(covered_schemes), unmatched_cassettes
161
+
162
+
163
+ def validate_response_against_schema(response_body: Any, schema: Dict[str, Any]) -> Tuple[bool, List[str]]:
164
+ """Validate a response body against a JSON schema.
165
+
166
+ Args:
167
+ response_body: The response body to validate (usually a dict or list)
168
+ schema: JSON schema to validate against
169
+
170
+ Returns:
171
+ Tuple of (is_valid, list_of_error_messages)
172
+ """
173
+ if not schema:
174
+ return True, []
175
+
176
+ try:
177
+ jsonschema.validate(instance=response_body, schema=schema)
178
+ return True, []
179
+ except jsonschema.ValidationError as e:
180
+ errors = [f"{e.message} at path: {'.'.join(str(p) for p in e.path)}"]
181
+ return False, errors
182
+ except jsonschema.SchemaError as e:
183
+ return False, [f"Invalid schema: {e.message}"]
184
+ except Exception as e:
185
+ return False, [f"Validation error: {str(e)}"]
186
+
187
+
188
+ def find_undeclared_fields(response_body: Any, schema: Dict[str, Any], path: str = "") -> List[str]:
189
+ """Find fields present in response but not declared in schema.
190
+
191
+ Args:
192
+ response_body: The response body to check
193
+ schema: JSON schema to check against
194
+ path: Current path in the object (for recursive calls)
195
+
196
+ Returns:
197
+ List of paths to undeclared fields with array indices normalized
198
+ (e.g., ["data.items[].extra_field"] instead of reporting for each element)
199
+ """
200
+ if not schema:
201
+ return []
202
+
203
+ undeclared_fields = []
204
+
205
+ if isinstance(response_body, dict) and schema.get("type") == "object":
206
+ schema_properties = schema.get("properties", {})
207
+ additional_properties = schema.get("additionalProperties", True)
208
+
209
+ for key, value in response_body.items():
210
+ field_path = f"{path}.{key}" if path else key
211
+
212
+ if key not in schema_properties:
213
+ if additional_properties is False:
214
+ undeclared_fields.append(field_path)
215
+ elif additional_properties is True or additional_properties == {}:
216
+ undeclared_fields.append(field_path)
217
+ elif isinstance(additional_properties, dict):
218
+ nested_undeclared = find_undeclared_fields(value, additional_properties, field_path)
219
+ undeclared_fields.extend(nested_undeclared)
220
+ else:
221
+ property_schema = schema_properties[key]
222
+ nested_undeclared = find_undeclared_fields(value, property_schema, field_path)
223
+ undeclared_fields.extend(nested_undeclared)
224
+
225
+ elif isinstance(response_body, list) and schema.get("type") == "array":
226
+ items_schema = schema.get("items", {})
227
+ if response_body:
228
+ item_path = f"{path}[]"
229
+ nested_undeclared = find_undeclared_fields(response_body[0], items_schema, item_path)
230
+ undeclared_fields.extend(nested_undeclared)
231
+
232
+ elif "anyOf" in schema or "oneOf" in schema or "allOf" in schema:
233
+ union_key = "anyOf" if "anyOf" in schema else "oneOf" if "oneOf" in schema else "allOf"
234
+ all_undeclared = []
235
+
236
+ for sub_schema in schema[union_key]:
237
+ sub_undeclared = find_undeclared_fields(response_body, sub_schema, path)
238
+ all_undeclared.append(set(sub_undeclared))
239
+
240
+ if all_undeclared:
241
+ common_undeclared = set.intersection(*all_undeclared)
242
+ undeclared_fields.extend(list(common_undeclared))
243
+
244
+ return undeclared_fields
245
+
246
+
247
+ def _extract_field_from_jsonpath(jsonpath_expr: str) -> str | None:
248
+ """Extract the full field path from a JSONPath expression.
249
+
250
+ Examples:
251
+ $.users -> "users"
252
+ $.data -> "data"
253
+ $.data.items -> "data.items" (returns full path)
254
+ $.data.repository -> "data.repository" (returns full path)
255
+ $.calls[0] -> "calls"
256
+
257
+ Args:
258
+ jsonpath_expr: JSONPath expression (e.g., "$.users" or "$.data.repository")
259
+
260
+ Returns:
261
+ Full field path or None if cannot parse
262
+ """
263
+ # Remove leading $. or $
264
+ expr = jsonpath_expr.strip()
265
+ if expr.startswith("$."):
266
+ expr = expr[2:]
267
+ elif expr.startswith("$"):
268
+ expr = expr[1:]
269
+
270
+ # Remove array indices and wildcards
271
+ expr = expr.replace("[0]", "").replace("[]", "").replace("[*]", "")
272
+
273
+ # Return the full path (not just the first segment)
274
+ return expr if expr else None
275
+
276
+
277
+ def _is_dynamic_object(schema: Dict[str, Any]) -> bool:
278
+ """Check if a schema is a dynamic object (inline object with no declared properties).
279
+
280
+ Dynamic objects are flexible schemas that allow arbitrary keys, indicated by:
281
+ - type: object
282
+ - No properties defined (or empty properties)
283
+ - This pattern is used for custom objects, arbitrary key-value stores, etc.
284
+
285
+ Args:
286
+ schema: Schema to check
287
+
288
+ Returns:
289
+ True if schema is a dynamic object (no declared properties)
290
+ """
291
+ if not isinstance(schema, dict):
292
+ return False
293
+
294
+ # Must be an object type
295
+ if schema.get("type") != "object":
296
+ return False
297
+
298
+ # Check if properties are missing or empty
299
+ properties = schema.get("properties", {})
300
+ return not properties
301
+
302
+
303
+ def _schema_has_ref_or_dynamic(field_schema: Dict[str, Any]) -> bool:
304
+ """Check if a schema field uses $ref or is a dynamic object.
305
+
306
+ For array fields, checks if the array items use $ref or are dynamic objects.
307
+ For object fields, checks if the field itself uses $ref or is a dynamic object.
308
+
309
+ Args:
310
+ field_schema: Schema to check
311
+
312
+ Returns:
313
+ True if field uses $ref, is a dynamic object, or contains items with $ref/dynamic objects
314
+ """
315
+ # If the field itself has a $ref, that's good
316
+ if "$ref" in field_schema:
317
+ return True
318
+
319
+ # If the field is a dynamic object (no declared properties), that's also acceptable
320
+ if _is_dynamic_object(field_schema):
321
+ return True
322
+
323
+ # If the field is an array, check if items have a $ref or are dynamic objects
324
+ if field_schema.get("type") == "array" and "items" in field_schema:
325
+ items_schema = field_schema["items"]
326
+ return "$ref" in items_schema or _is_dynamic_object(items_schema)
327
+
328
+ return False
329
+
330
+
331
+ def _check_field_has_ref_or_dynamic(field_path: str, schema: Dict[str, Any]) -> bool:
332
+ """Check if a field in the schema uses $ref or is a dynamic object.
333
+
334
+ For array fields, checks if the array items use $ref or are dynamic objects.
335
+ For object fields, checks if the field itself uses $ref or is a dynamic object.
336
+
337
+ Dynamic objects (inline objects with no declared properties) are allowed because
338
+ they represent flexible schemas like custom objects or arbitrary key-value stores.
339
+
340
+ Args:
341
+ field_path: Field path (e.g., "users" or "data")
342
+ schema: Response schema (unresolved, to check for $ref presence)
343
+
344
+ Returns:
345
+ True if field uses $ref, is a dynamic object, or contains items with $ref/dynamic objects
346
+ """
347
+ if not schema or not isinstance(schema, dict):
348
+ return False
349
+
350
+ # For simple field (e.g., "users")
351
+ if "." not in field_path:
352
+ if "properties" in schema:
353
+ field_schema = schema["properties"].get(field_path, {})
354
+ return _schema_has_ref_or_dynamic(field_schema)
355
+ return False
356
+
357
+ # For nested field (e.g., "data.items"), traverse
358
+ parts = field_path.split(".")
359
+ current = schema
360
+
361
+ for part in parts:
362
+ if not isinstance(current, dict):
363
+ return False
364
+
365
+ # Handle array types by descending into items first
366
+ if current.get("type") == "array" and "items" in current:
367
+ current = current["items"]
368
+ if not isinstance(current, dict):
369
+ return False
370
+
371
+ if "properties" in current:
372
+ current = current["properties"].get(part, {})
373
+ else:
374
+ return False
375
+
376
+ # Check if the final field has a $ref, is a dynamic object, or is an array with items that qualify
377
+ return _schema_has_ref_or_dynamic(current)
378
+
379
+
380
+ def _check_field_in_schema(jsonpath_expr: str, schema: Dict[str, Any]) -> bool:
381
+ """Check if a JSONPath expression corresponds to fields in schema.
382
+
383
+ Args:
384
+ jsonpath_expr: JSONPath (e.g., "$.records", "$.pagination.cursor")
385
+ schema: Response schema
386
+
387
+ Returns:
388
+ True if path exists in schema, False otherwise
389
+ """
390
+ field_path = _extract_field_from_jsonpath(jsonpath_expr)
391
+
392
+ if not field_path or not schema:
393
+ return False
394
+
395
+ # Navigate schema to check field exists
396
+ current = schema
397
+ parts = field_path.split(".")
398
+
399
+ for part in parts:
400
+ if not isinstance(current, dict):
401
+ return False
402
+
403
+ # Check in properties
404
+ if "properties" in current and part in current["properties"]:
405
+ current = current["properties"][part]
406
+ continue
407
+
408
+ # Check in additionalProperties (for dynamic schemas)
409
+ if "additionalProperties" in current:
410
+ return True # Can't validate further for dynamic schemas
411
+
412
+ # Field not found
413
+ return False
414
+
415
+ return True
416
+
417
+
418
+ def validate_record_extractor_has_ref(
419
+ endpoint: EndpointDefinition,
420
+ raw_spec: Dict[str, Any],
421
+ entity_name: str,
422
+ action: str,
423
+ ) -> Tuple[bool, List[str], List[str]]:
424
+ """
425
+ Validate that x-airbyte-record-extractor points to a field with a $ref or dynamic object.
426
+
427
+ When an operation defines x-airbyte-record-extractor, the JSONPath should point
428
+ to a field in the response schema that either:
429
+ 1. Uses a $ref for proper typing with named schemas, OR
430
+ 2. Is a dynamic object (type: object with no declared properties) for flexible schemas
431
+
432
+ Dynamic objects are acceptable for cases like custom objects or arbitrary key-value
433
+ stores where the schema is intentionally flexible.
434
+
435
+ Args:
436
+ endpoint: The endpoint definition with response_schema
437
+ raw_spec: Raw OpenAPI spec dict (unresolved) to check for $refs
438
+ entity_name: Entity name (for error messages)
439
+ action: Action name (for error messages)
440
+
441
+ Returns:
442
+ Tuple of (is_valid, errors, warnings)
443
+ - is_valid: False if extractor points to inline schema with declared properties
444
+ - errors: List of error messages
445
+ - warnings: List of warning messages
446
+
447
+ Example:
448
+ Good: x-airbyte-record-extractor: $.users
449
+ Response schema has: users: { $ref: "#/components/schemas/User" }
450
+
451
+ Good: x-airbyte-record-extractor: $.custom_fields
452
+ Response schema has: custom_fields: { type: object, additionalProperties: true }
453
+ (no properties declared = dynamic schema)
454
+
455
+ Bad: x-airbyte-record-extractor: $.data
456
+ Response schema has: data: { type: object, properties: { id: {...}, name: {...} } }
457
+ (inline schema with declared properties)
458
+ """
459
+ errors = []
460
+ warnings = []
461
+
462
+ if not endpoint.record_extractor:
463
+ return True, errors, warnings
464
+
465
+ if not endpoint.response_schema:
466
+ warnings.append(f"{entity_name}.{action}: Has x-airbyte-record-extractor but no response schema defined")
467
+ return True, errors, warnings
468
+
469
+ # Parse JSONPath to find target field
470
+ extractor_path = endpoint.record_extractor
471
+
472
+ try:
473
+ # Extract the field name from JSONPath (e.g., "$.users" -> "users")
474
+ target_field = _extract_field_from_jsonpath(extractor_path)
475
+
476
+ if not target_field:
477
+ warnings.append(f"{entity_name}.{action}: Cannot parse x-airbyte-record-extractor JSONPath: {extractor_path}")
478
+ return True, errors, warnings
479
+
480
+ # Find the unresolved response schema in the raw spec to check for $ref
481
+ unresolved_schema = _find_unresolved_response_schema(raw_spec, endpoint.path, endpoint.method)
482
+
483
+ if not unresolved_schema:
484
+ # If we can't find the unresolved schema, skip validation
485
+ return True, errors, warnings
486
+
487
+ # Check if the target field has a $ref or is a dynamic object in the unresolved schema
488
+ has_ref_or_dynamic = _check_field_has_ref_or_dynamic(target_field, unresolved_schema)
489
+
490
+ if not has_ref_or_dynamic:
491
+ errors.append(
492
+ f"{entity_name}.{action}: x-airbyte-record-extractor '{extractor_path}' points to field "
493
+ f"'{target_field}' which uses an inline schema with declared properties. Records should "
494
+ f"reference a named schema (e.g., $ref: '#/components/schemas/{target_field.title()}') for proper typing."
495
+ )
496
+ return False, errors, warnings
497
+
498
+ except Exception as e:
499
+ warnings.append(f"{entity_name}.{action}: Error validating x-airbyte-record-extractor: {str(e)}")
500
+ return True, errors, warnings
501
+
502
+ return True, errors, warnings
503
+
504
+
505
+ def _find_unresolved_response_schema(raw_spec: Dict[str, Any], path: str, method: str) -> Dict[str, Any] | None:
506
+ """Find the unresolved response schema from the raw OpenAPI spec.
507
+
508
+ If the response schema itself is a $ref, resolves it once to get the actual schema.
509
+ This allows us to check the properties within that schema for nested $refs.
510
+
511
+ Args:
512
+ raw_spec: Raw OpenAPI spec dict
513
+ path: API path (e.g., "/v1/users")
514
+ method: HTTP method (e.g., "GET")
515
+
516
+ Returns:
517
+ Unresolved response schema dict or None if not found
518
+ """
519
+ try:
520
+ paths = raw_spec.get("paths", {})
521
+ if path not in paths:
522
+ return None
523
+
524
+ path_item = paths[path]
525
+ operation = path_item.get(method.lower())
526
+ if not operation:
527
+ return None
528
+
529
+ responses = operation.get("responses", {})
530
+ response_200 = responses.get("200")
531
+ if not response_200:
532
+ return None
533
+
534
+ content = response_200.get("content", {})
535
+ json_content = content.get("application/json")
536
+ if not json_content:
537
+ return None
538
+
539
+ schema = json_content.get("schema")
540
+
541
+ # If the schema itself is a $ref, resolve it once to get to the actual schema
542
+ # This is needed for cases like: schema: { $ref: "#/components/schemas/UsersResponse" }
543
+ # We want to check the properties inside UsersResponse
544
+ if schema and "$ref" in schema:
545
+ ref_path = schema["$ref"]
546
+ # Parse the reference (e.g., "#/components/schemas/UsersResponse")
547
+ if ref_path.startswith("#/"):
548
+ parts = ref_path[2:].split("/")
549
+ resolved = raw_spec
550
+ for part in parts:
551
+ resolved = resolved.get(part, {})
552
+ if not resolved:
553
+ return None
554
+ return resolved
555
+
556
+ return schema
557
+ except Exception:
558
+ return None
559
+
560
+
561
+ def validate_meta_extractor_fields(
562
+ endpoint: EndpointDefinition,
563
+ cassette_path: Path,
564
+ entity_name: str,
565
+ action: str,
566
+ ) -> Tuple[bool, List[str], List[str]]:
567
+ """
568
+ Validate x-airbyte-meta-extractor fields exist in cassettes and schema.
569
+
570
+ Checks that:
571
+ 1. Fields extracted by meta-extractor exist in actual cassette responses
572
+ 2. Those fields are declared in the response schema
573
+
574
+ Args:
575
+ endpoint: Endpoint definition with meta_extractor and response_schema
576
+ cassette_path: Path to cassette file
577
+ entity_name: Entity name (for error messages)
578
+ action: Action name (for error messages)
579
+
580
+ Returns:
581
+ Tuple of (is_valid, errors, warnings)
582
+ - is_valid: Always True (missing fields are warnings, not errors)
583
+ - errors: Empty (meta-extractor issues are non-blocking)
584
+ - warnings: List of warning messages
585
+ """
586
+ errors = []
587
+ warnings = []
588
+
589
+ if not endpoint.meta_extractor:
590
+ return True, errors, warnings
591
+
592
+ try:
593
+ # Load cassette to get actual response
594
+ spec = load_test_spec(cassette_path, auth_config={})
595
+ response_body = spec.captured_response.body
596
+
597
+ # Validate each meta extractor field
598
+ for field_name, extractor_expr in endpoint.meta_extractor.items():
599
+ # Skip header-based extractors - they extract from headers, not response body
600
+ # @link.next extracts from RFC 5988 Link header
601
+ # @header.X-Name extracts raw header value
602
+ if extractor_expr.startswith("@link.") or extractor_expr.startswith("@header."):
603
+ continue
604
+
605
+ # Check 1: Does the JSONPath find data in the actual response?
606
+ try:
607
+ parsed_expr = parse_jsonpath(extractor_expr)
608
+ matches = [match.value for match in parsed_expr.find(response_body)]
609
+
610
+ if not matches:
611
+ warnings.append(
612
+ f"{entity_name}.{action}: x-airbyte-meta-extractor field '{field_name}' "
613
+ f"with JSONPath '{extractor_expr}' found no matches in cassette response"
614
+ )
615
+ except Exception as e:
616
+ warnings.append(
617
+ f"{entity_name}.{action}: x-airbyte-meta-extractor field '{field_name}' has invalid JSONPath '{extractor_expr}': {str(e)}"
618
+ )
619
+
620
+ # Check 2: Is this field path declared in the response schema?
621
+ if endpoint.response_schema:
622
+ field_in_schema = _check_field_in_schema(extractor_expr, endpoint.response_schema)
623
+
624
+ if not field_in_schema:
625
+ warnings.append(
626
+ f"{entity_name}.{action}: x-airbyte-meta-extractor field '{field_name}' "
627
+ f"extracts from '{extractor_expr}' but this path is not declared in response schema"
628
+ )
629
+
630
+ except Exception as e:
631
+ warnings.append(f"{entity_name}.{action}: Error validating x-airbyte-meta-extractor: {str(e)}")
632
+
633
+ return True, errors, warnings
634
+
635
+
636
+ def validate_connector_readiness(connector_dir: str | Path) -> Dict[str, Any]:
637
+ """
638
+ Validate that a connector is ready to ship.
639
+
640
+ Checks that:
641
+ - connector.yaml exists and is valid
642
+ - For each entity/action defined, corresponding cassette(s) exist
643
+ - Response schemas in connector.yaml match cassette responses
644
+ - Detects fields in responses that are not declared in the schema (as warnings)
645
+
646
+ Args:
647
+ connector_dir: Path to the connector directory (e.g., "/path/to/integrations/stripe")
648
+
649
+ Returns:
650
+ Dict with validation results including:
651
+ - success: Overall success status
652
+ - connector_name: Name of the connector
653
+ - validation_results: List of results for each entity/action
654
+ - summary: Summary statistics
655
+
656
+ Each validation result includes:
657
+ - warnings: Human-readable warnings (e.g., "Undeclared field in response: data[].extra_field")
658
+ - errors: Actual schema validation errors (e.g., missing required fields, type mismatches)
659
+
660
+ Note: Undeclared fields are surfaced as warnings, not errors. This allows connectors
661
+ with dynamic/flexible schemas (like custom objects) to pass validation while still
662
+ highlighting fields that could be added to the schema. Non-dynamic schemas are expected
663
+ to have all fields in the schema.
664
+
665
+ Example:
666
+ validate_connector_readiness("/path/to/integrations/stripe")
667
+ """
668
+ connector_path = Path(connector_dir)
669
+
670
+ if not connector_path.exists():
671
+ return {
672
+ "success": False,
673
+ "error": f"Connector directory not found: {connector_dir}",
674
+ }
675
+
676
+ config_file = connector_path / "connector.yaml"
677
+ if not config_file.exists():
678
+ return {
679
+ "success": False,
680
+ "error": f"connector.yaml not found in {connector_dir}",
681
+ }
682
+
683
+ try:
684
+ config = load_connector_model(config_file)
685
+ except ConnectorModelLoaderError as e:
686
+ return {"success": False, "error": f"Failed to load connector.yaml: {str(e)}"}
687
+
688
+ # Load the raw spec for extractor validation
689
+ try:
690
+ with open(config_file) as f:
691
+ raw_spec = yaml.safe_load(f)
692
+ except Exception:
693
+ raw_spec = {}
694
+
695
+ cassettes_dir = connector_path / "tests" / "cassettes"
696
+ cassette_map = build_cassette_map(cassettes_dir)
697
+
698
+ # Validate auth scheme coverage for multi-auth connectors
699
+ auth_valid, auth_errors, auth_warnings, auth_covered_schemes, auth_unmatched_cassettes = validate_auth_scheme_coverage(config, cassettes_dir)
700
+
701
+ validation_results = []
702
+ total_operations = 0
703
+ operations_with_cassettes = 0
704
+ operations_missing_cassettes = 0
705
+ total_cassettes = 0
706
+ cassettes_valid = 0
707
+ cassettes_invalid = 0
708
+ total_warnings = 0
709
+ total_errors = 0
710
+
711
+ for entity in config.entities:
712
+ for action in entity.actions:
713
+ total_operations += 1
714
+
715
+ key = (entity.name, action.value)
716
+ cassette_paths = cassette_map.get(key, [])
717
+
718
+ endpoint = entity.endpoints[action]
719
+ # Check if this is a download action
720
+ is_download = action == Action.DOWNLOAD
721
+ # Check if operation is marked as untested
722
+ is_untested = endpoint.untested
723
+
724
+ if not cassette_paths:
725
+ # For untested operations, add a warning instead of an error
726
+ if is_untested:
727
+ total_warnings += 1
728
+ validation_results.append(
729
+ {
730
+ "entity": entity.name,
731
+ "action": action.value,
732
+ "cassettes_found": 0,
733
+ "cassette_paths": [],
734
+ "schema_defined": endpoint.response_schema is not None,
735
+ "is_download": is_download,
736
+ "untested": True,
737
+ "schema_validation": [],
738
+ "warnings": [
739
+ f"Operation {entity.name}.{action.value} is marked as untested "
740
+ f"(x-airbyte-untested: true) and has no cassettes. Validation skipped."
741
+ ],
742
+ }
743
+ )
744
+ continue
745
+
746
+ # For tested operations, this is an error
747
+ operations_missing_cassettes += 1
748
+ validation_results.append(
749
+ {
750
+ "entity": entity.name,
751
+ "action": action.value,
752
+ "cassettes_found": 0,
753
+ "cassette_paths": [],
754
+ "schema_defined": endpoint.response_schema is not None,
755
+ "is_download": is_download,
756
+ "schema_validation": [],
757
+ }
758
+ )
759
+ continue
760
+
761
+ operations_with_cassettes += 1
762
+ total_cassettes += len(cassette_paths)
763
+
764
+ response_schema = endpoint.response_schema
765
+ schema_defined = response_schema is not None
766
+
767
+ # Validate x-airbyte-record-extractor (once per endpoint, not per cassette)
768
+ if endpoint.record_extractor and not is_download:
769
+ is_valid, extractor_errors, extractor_warnings = validate_record_extractor_has_ref(
770
+ endpoint=endpoint,
771
+ raw_spec=raw_spec,
772
+ entity_name=entity.name,
773
+ action=action.value,
774
+ )
775
+
776
+ if not is_valid:
777
+ cassettes_invalid += len(cassette_paths)
778
+ total_errors += len(extractor_errors) * len(cassette_paths)
779
+
780
+ total_warnings += len(extractor_warnings)
781
+
782
+ # If record extractor validation fails, add error to all cassettes for this endpoint
783
+ if not is_valid:
784
+ schema_validation = [
785
+ {
786
+ "cassette": str(p.name),
787
+ "valid": False,
788
+ "errors": extractor_errors,
789
+ "warnings": extractor_warnings,
790
+ }
791
+ for p in cassette_paths
792
+ ]
793
+ validation_results.append(
794
+ {
795
+ "entity": entity.name,
796
+ "action": action.value,
797
+ "cassettes_found": len(cassette_paths),
798
+ "cassette_paths": [str(p.name) for p in cassette_paths],
799
+ "schema_defined": schema_defined,
800
+ "is_download": is_download,
801
+ "schema_validation": schema_validation,
802
+ }
803
+ )
804
+ continue
805
+
806
+ schema_validation = []
807
+ for cassette_path in cassette_paths:
808
+ try:
809
+ spec = load_test_spec(cassette_path, auth_config={})
810
+
811
+ # For download actions, validate that captured_file_request/response exist
812
+ if is_download:
813
+ has_file_request = hasattr(spec, "captured_file_request") and spec.captured_file_request is not None
814
+ has_file_response = hasattr(spec, "captured_file_response") and spec.captured_file_response is not None
815
+
816
+ if has_file_request and has_file_response:
817
+ cassettes_valid += 1
818
+ schema_validation.append(
819
+ {
820
+ "cassette": str(cassette_path.name),
821
+ "valid": True,
822
+ "errors": [],
823
+ "warnings": [],
824
+ }
825
+ )
826
+ else:
827
+ cassettes_invalid += 1
828
+ total_errors += 1
829
+ errors = []
830
+ if not has_file_request:
831
+ errors.append("Missing captured_file_request for download action")
832
+ elif not has_file_response:
833
+ errors.append("Missing captured_file_response for download action")
834
+ schema_validation.append(
835
+ {
836
+ "cassette": str(cassette_path.name),
837
+ "valid": False,
838
+ "errors": errors,
839
+ "warnings": [],
840
+ }
841
+ )
842
+ continue
843
+
844
+ # For non-download actions, validate response schema
845
+ response_body = spec.captured_response.body
846
+
847
+ if response_schema:
848
+ is_valid, errors = validate_response_against_schema(response_body, response_schema)
849
+
850
+ undeclared_fields = find_undeclared_fields(response_body, response_schema)
851
+
852
+ warnings = []
853
+ if undeclared_fields:
854
+ warnings = [f"Undeclared field in response: {field}" for field in undeclared_fields]
855
+
856
+ # Validate x-airbyte-meta-extractor fields
857
+ if endpoint.meta_extractor:
858
+ (
859
+ _,
860
+ meta_errors,
861
+ meta_warnings,
862
+ ) = validate_meta_extractor_fields(
863
+ endpoint=endpoint,
864
+ cassette_path=cassette_path,
865
+ entity_name=entity.name,
866
+ action=action.value,
867
+ )
868
+ warnings.extend(meta_warnings)
869
+ # Meta extractor errors are also treated as warnings (non-blocking)
870
+ warnings.extend(meta_errors)
871
+
872
+ if is_valid:
873
+ cassettes_valid += 1
874
+ else:
875
+ cassettes_invalid += 1
876
+
877
+ total_warnings += len(warnings)
878
+ total_errors += len(errors)
879
+
880
+ schema_validation.append(
881
+ {
882
+ "cassette": str(cassette_path.name),
883
+ "valid": is_valid,
884
+ "errors": errors,
885
+ "warnings": warnings,
886
+ }
887
+ )
888
+ else:
889
+ total_errors += 1
890
+ schema_validation.append(
891
+ {
892
+ "cassette": str(cassette_path.name),
893
+ "valid": None,
894
+ "errors": ["No response schema defined in connector.yaml"],
895
+ "warnings": [],
896
+ }
897
+ )
898
+
899
+ except Exception as e:
900
+ cassettes_invalid += 1
901
+ total_errors += 1
902
+ schema_validation.append(
903
+ {
904
+ "cassette": str(cassette_path.name),
905
+ "valid": False,
906
+ "errors": [f"Failed to load/validate cassette: {str(e)}"],
907
+ "warnings": [],
908
+ }
909
+ )
910
+
911
+ validation_results.append(
912
+ {
913
+ "entity": entity.name,
914
+ "action": action.value,
915
+ "cassettes_found": len(cassette_paths),
916
+ "cassette_paths": [str(p.name) for p in cassette_paths],
917
+ "schema_defined": schema_defined,
918
+ "is_download": is_download,
919
+ "schema_validation": schema_validation,
920
+ }
921
+ )
922
+
923
+ # Validate replication compatibility with Airbyte
924
+ replication_result = validate_replication_compatibility(
925
+ connector_yaml_path=config_file,
926
+ raw_spec=raw_spec,
927
+ )
928
+
929
+ # Merge replication errors/warnings into totals
930
+ # Note: If connector is not in registry, we don't count warnings since this is expected for test connectors
931
+ replication_errors = replication_result.get("errors", [])
932
+ replication_warnings = replication_result.get("warnings", [])
933
+ total_errors += len(replication_errors)
934
+
935
+ # Only count replication warnings if the connector was found in the registry
936
+ # (i.e., there are actual validation issues, not just "not found in registry")
937
+ if replication_result.get("registry_found", False):
938
+ total_warnings += len(replication_warnings)
939
+
940
+ # Merge auth scheme validation errors/warnings into totals
941
+ total_errors += len(auth_errors)
942
+ total_warnings += len(auth_warnings)
943
+
944
+ # Update success criteria to include replication and auth scheme validation
945
+ success = operations_missing_cassettes == 0 and cassettes_invalid == 0 and total_operations > 0 and len(replication_errors) == 0 and auth_valid
946
+
947
+ # Check for preferred_for_check on at least one list operation
948
+ has_preferred_check = False
949
+ for entity in config.entities:
950
+ for action_val in entity.actions:
951
+ endpoint = entity.endpoints.get(action_val)
952
+ if endpoint and getattr(endpoint, "preferred_for_check", False):
953
+ has_preferred_check = True
954
+ break
955
+ if has_preferred_check:
956
+ break
957
+
958
+ readiness_warnings = []
959
+ if not has_preferred_check:
960
+ readiness_warnings.append(
961
+ "No operation has x-airbyte-preferred-for-check: true. "
962
+ "Add this extension to a lightweight list operation (e.g., users.list) "
963
+ "to enable reliable health checks."
964
+ )
965
+
966
+ # Build auth scheme validation result
967
+ options = config.auth.options or []
968
+ tested_schemes = [opt.scheme_name for opt in options if not opt.untested]
969
+ untested_schemes_list = [opt.scheme_name for opt in options if opt.untested]
970
+ missing_tested = [s for s in tested_schemes if s not in auth_covered_schemes]
971
+
972
+ return {
973
+ "success": success,
974
+ "connector_name": config.name,
975
+ "connector_path": str(connector_path),
976
+ "validation_results": validation_results,
977
+ "replication_validation": replication_result,
978
+ "auth_scheme_validation": {
979
+ "valid": auth_valid,
980
+ "errors": auth_errors,
981
+ "warnings": auth_warnings,
982
+ "covered_schemes": auth_covered_schemes,
983
+ "missing_schemes": missing_tested,
984
+ "untested_schemes": untested_schemes_list,
985
+ },
986
+ "readiness_warnings": readiness_warnings,
987
+ "summary": {
988
+ "total_operations": total_operations,
989
+ "operations_with_cassettes": operations_with_cassettes,
990
+ "operations_missing_cassettes": operations_missing_cassettes,
991
+ "total_cassettes": total_cassettes,
992
+ "cassettes_valid": cassettes_valid,
993
+ "cassettes_invalid": cassettes_invalid,
994
+ "total_warnings": total_warnings,
995
+ "total_errors": total_errors,
996
+ },
997
+ }