airbyte-agent-stripe 0.5.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of airbyte-agent-stripe might be problematic. Click here for more details.

Files changed (55) hide show
  1. airbyte_agent_stripe/__init__.py +237 -0
  2. airbyte_agent_stripe/_vendored/__init__.py +1 -0
  3. airbyte_agent_stripe/_vendored/connector_sdk/__init__.py +82 -0
  4. airbyte_agent_stripe/_vendored/connector_sdk/auth_strategies.py +1123 -0
  5. airbyte_agent_stripe/_vendored/connector_sdk/auth_template.py +135 -0
  6. airbyte_agent_stripe/_vendored/connector_sdk/cloud_utils/__init__.py +5 -0
  7. airbyte_agent_stripe/_vendored/connector_sdk/cloud_utils/client.py +213 -0
  8. airbyte_agent_stripe/_vendored/connector_sdk/connector_model_loader.py +957 -0
  9. airbyte_agent_stripe/_vendored/connector_sdk/constants.py +78 -0
  10. airbyte_agent_stripe/_vendored/connector_sdk/exceptions.py +23 -0
  11. airbyte_agent_stripe/_vendored/connector_sdk/executor/__init__.py +31 -0
  12. airbyte_agent_stripe/_vendored/connector_sdk/executor/hosted_executor.py +197 -0
  13. airbyte_agent_stripe/_vendored/connector_sdk/executor/local_executor.py +1504 -0
  14. airbyte_agent_stripe/_vendored/connector_sdk/executor/models.py +190 -0
  15. airbyte_agent_stripe/_vendored/connector_sdk/extensions.py +655 -0
  16. airbyte_agent_stripe/_vendored/connector_sdk/http/__init__.py +37 -0
  17. airbyte_agent_stripe/_vendored/connector_sdk/http/adapters/__init__.py +9 -0
  18. airbyte_agent_stripe/_vendored/connector_sdk/http/adapters/httpx_adapter.py +251 -0
  19. airbyte_agent_stripe/_vendored/connector_sdk/http/config.py +98 -0
  20. airbyte_agent_stripe/_vendored/connector_sdk/http/exceptions.py +119 -0
  21. airbyte_agent_stripe/_vendored/connector_sdk/http/protocols.py +114 -0
  22. airbyte_agent_stripe/_vendored/connector_sdk/http/response.py +102 -0
  23. airbyte_agent_stripe/_vendored/connector_sdk/http_client.py +686 -0
  24. airbyte_agent_stripe/_vendored/connector_sdk/logging/__init__.py +11 -0
  25. airbyte_agent_stripe/_vendored/connector_sdk/logging/logger.py +264 -0
  26. airbyte_agent_stripe/_vendored/connector_sdk/logging/types.py +92 -0
  27. airbyte_agent_stripe/_vendored/connector_sdk/observability/__init__.py +11 -0
  28. airbyte_agent_stripe/_vendored/connector_sdk/observability/models.py +19 -0
  29. airbyte_agent_stripe/_vendored/connector_sdk/observability/redactor.py +81 -0
  30. airbyte_agent_stripe/_vendored/connector_sdk/observability/session.py +94 -0
  31. airbyte_agent_stripe/_vendored/connector_sdk/performance/__init__.py +6 -0
  32. airbyte_agent_stripe/_vendored/connector_sdk/performance/instrumentation.py +57 -0
  33. airbyte_agent_stripe/_vendored/connector_sdk/performance/metrics.py +93 -0
  34. airbyte_agent_stripe/_vendored/connector_sdk/schema/__init__.py +75 -0
  35. airbyte_agent_stripe/_vendored/connector_sdk/schema/base.py +161 -0
  36. airbyte_agent_stripe/_vendored/connector_sdk/schema/components.py +238 -0
  37. airbyte_agent_stripe/_vendored/connector_sdk/schema/connector.py +131 -0
  38. airbyte_agent_stripe/_vendored/connector_sdk/schema/extensions.py +109 -0
  39. airbyte_agent_stripe/_vendored/connector_sdk/schema/operations.py +146 -0
  40. airbyte_agent_stripe/_vendored/connector_sdk/schema/security.py +213 -0
  41. airbyte_agent_stripe/_vendored/connector_sdk/secrets.py +182 -0
  42. airbyte_agent_stripe/_vendored/connector_sdk/telemetry/__init__.py +10 -0
  43. airbyte_agent_stripe/_vendored/connector_sdk/telemetry/config.py +32 -0
  44. airbyte_agent_stripe/_vendored/connector_sdk/telemetry/events.py +58 -0
  45. airbyte_agent_stripe/_vendored/connector_sdk/telemetry/tracker.py +151 -0
  46. airbyte_agent_stripe/_vendored/connector_sdk/types.py +241 -0
  47. airbyte_agent_stripe/_vendored/connector_sdk/utils.py +60 -0
  48. airbyte_agent_stripe/_vendored/connector_sdk/validation.py +822 -0
  49. airbyte_agent_stripe/connector.py +1579 -0
  50. airbyte_agent_stripe/connector_model.py +14869 -0
  51. airbyte_agent_stripe/models.py +2353 -0
  52. airbyte_agent_stripe/types.py +295 -0
  53. airbyte_agent_stripe-0.5.25.dist-info/METADATA +110 -0
  54. airbyte_agent_stripe-0.5.25.dist-info/RECORD +55 -0
  55. airbyte_agent_stripe-0.5.25.dist-info/WHEEL +4 -0
@@ -0,0 +1,822 @@
1
+ """
2
+ Validation tools for connector readiness and schema compliance.
3
+
4
+ These tools help ensure that connectors are ready to ship by:
5
+ - Checking that all entity/action operations have corresponding test cassettes
6
+ - Validating that response schemas match the actual cassette responses
7
+ - Detecting fields present in responses but not declared in schemas
8
+ """
9
+
10
+ from collections import defaultdict
11
+ from pathlib import Path
12
+ from typing import Any, Dict, List, Tuple
13
+
14
+ import jsonschema
15
+ import yaml
16
+ from jsonpath_ng import parse as parse_jsonpath
17
+
18
+ from .connector_model_loader import (
19
+ ConnectorModelLoaderError,
20
+ load_connector_model,
21
+ )
22
+ from .testing.spec_loader import load_test_spec
23
+ from .types import Action, EndpointDefinition
24
+
25
+
26
+ def build_cassette_map(cassettes_dir: Path) -> Dict[Tuple[str, str], List[Path]]:
27
+ """Build a map of (entity, action) -> list of cassette paths.
28
+
29
+ Reads the entity/action from TestSpec.entity and TestSpec.action fields,
30
+ not from the filename.
31
+
32
+ Args:
33
+ cassettes_dir: Directory containing cassette YAML files
34
+
35
+ Returns:
36
+ Dictionary mapping (entity, action) tuples to lists of cassette file paths
37
+ """
38
+ cassette_map: Dict[Tuple[str, str], List[Path]] = defaultdict(list)
39
+
40
+ if not cassettes_dir.exists() or not cassettes_dir.is_dir():
41
+ return {}
42
+
43
+ for cassette_file in cassettes_dir.glob("*.yaml"):
44
+ try:
45
+ spec = load_test_spec(cassette_file, auth_config={})
46
+ key = (spec.entity, spec.action)
47
+ cassette_map[key].append(cassette_file)
48
+ except Exception:
49
+ continue
50
+
51
+ return dict(cassette_map)
52
+
53
+
54
+ def validate_response_against_schema(response_body: Any, schema: Dict[str, Any]) -> Tuple[bool, List[str]]:
55
+ """Validate a response body against a JSON schema.
56
+
57
+ Args:
58
+ response_body: The response body to validate (usually a dict or list)
59
+ schema: JSON schema to validate against
60
+
61
+ Returns:
62
+ Tuple of (is_valid, list_of_error_messages)
63
+ """
64
+ if not schema:
65
+ return True, []
66
+
67
+ try:
68
+ jsonschema.validate(instance=response_body, schema=schema)
69
+ return True, []
70
+ except jsonschema.ValidationError as e:
71
+ errors = [f"{e.message} at path: {'.'.join(str(p) for p in e.path)}"]
72
+ return False, errors
73
+ except jsonschema.SchemaError as e:
74
+ return False, [f"Invalid schema: {e.message}"]
75
+ except Exception as e:
76
+ return False, [f"Validation error: {str(e)}"]
77
+
78
+
79
+ def find_undeclared_fields(response_body: Any, schema: Dict[str, Any], path: str = "") -> List[str]:
80
+ """Find fields present in response but not declared in schema.
81
+
82
+ Args:
83
+ response_body: The response body to check
84
+ schema: JSON schema to check against
85
+ path: Current path in the object (for recursive calls)
86
+
87
+ Returns:
88
+ List of paths to undeclared fields with array indices normalized
89
+ (e.g., ["data.items[].extra_field"] instead of reporting for each element)
90
+ """
91
+ if not schema:
92
+ return []
93
+
94
+ undeclared_fields = []
95
+
96
+ if isinstance(response_body, dict) and schema.get("type") == "object":
97
+ schema_properties = schema.get("properties", {})
98
+ additional_properties = schema.get("additionalProperties", True)
99
+
100
+ for key, value in response_body.items():
101
+ field_path = f"{path}.{key}" if path else key
102
+
103
+ if key not in schema_properties:
104
+ if additional_properties is False:
105
+ undeclared_fields.append(field_path)
106
+ elif additional_properties is True or additional_properties == {}:
107
+ undeclared_fields.append(field_path)
108
+ elif isinstance(additional_properties, dict):
109
+ nested_undeclared = find_undeclared_fields(value, additional_properties, field_path)
110
+ undeclared_fields.extend(nested_undeclared)
111
+ else:
112
+ property_schema = schema_properties[key]
113
+ nested_undeclared = find_undeclared_fields(value, property_schema, field_path)
114
+ undeclared_fields.extend(nested_undeclared)
115
+
116
+ elif isinstance(response_body, list) and schema.get("type") == "array":
117
+ items_schema = schema.get("items", {})
118
+ if response_body:
119
+ item_path = f"{path}[]"
120
+ nested_undeclared = find_undeclared_fields(response_body[0], items_schema, item_path)
121
+ undeclared_fields.extend(nested_undeclared)
122
+
123
+ elif "anyOf" in schema or "oneOf" in schema or "allOf" in schema:
124
+ union_key = "anyOf" if "anyOf" in schema else "oneOf" if "oneOf" in schema else "allOf"
125
+ all_undeclared = []
126
+
127
+ for sub_schema in schema[union_key]:
128
+ sub_undeclared = find_undeclared_fields(response_body, sub_schema, path)
129
+ all_undeclared.append(set(sub_undeclared))
130
+
131
+ if all_undeclared:
132
+ common_undeclared = set.intersection(*all_undeclared)
133
+ undeclared_fields.extend(list(common_undeclared))
134
+
135
+ return undeclared_fields
136
+
137
+
138
+ def _extract_field_from_jsonpath(jsonpath_expr: str) -> str | None:
139
+ """Extract the full field path from a JSONPath expression.
140
+
141
+ Examples:
142
+ $.users -> "users"
143
+ $.data -> "data"
144
+ $.data.items -> "data.items" (returns full path)
145
+ $.data.repository -> "data.repository" (returns full path)
146
+ $.calls[0] -> "calls"
147
+
148
+ Args:
149
+ jsonpath_expr: JSONPath expression (e.g., "$.users" or "$.data.repository")
150
+
151
+ Returns:
152
+ Full field path or None if cannot parse
153
+ """
154
+ # Remove leading $. or $
155
+ expr = jsonpath_expr.strip()
156
+ if expr.startswith("$."):
157
+ expr = expr[2:]
158
+ elif expr.startswith("$"):
159
+ expr = expr[1:]
160
+
161
+ # Remove array indices and wildcards
162
+ expr = expr.replace("[0]", "").replace("[]", "").replace("[*]", "")
163
+
164
+ # Return the full path (not just the first segment)
165
+ return expr if expr else None
166
+
167
+
168
+ def _is_dynamic_object(schema: Dict[str, Any]) -> bool:
169
+ """Check if a schema is a dynamic object (inline object with no declared properties).
170
+
171
+ Dynamic objects are flexible schemas that allow arbitrary keys, indicated by:
172
+ - type: object
173
+ - No properties defined (or empty properties)
174
+ - This pattern is used for custom objects, arbitrary key-value stores, etc.
175
+
176
+ Args:
177
+ schema: Schema to check
178
+
179
+ Returns:
180
+ True if schema is a dynamic object (no declared properties)
181
+ """
182
+ if not isinstance(schema, dict):
183
+ return False
184
+
185
+ # Must be an object type
186
+ if schema.get("type") != "object":
187
+ return False
188
+
189
+ # Check if properties are missing or empty
190
+ properties = schema.get("properties", {})
191
+ return not properties
192
+
193
+
194
+ def _schema_has_ref_or_dynamic(field_schema: Dict[str, Any]) -> bool:
195
+ """Check if a schema field uses $ref or is a dynamic object.
196
+
197
+ For array fields, checks if the array items use $ref or are dynamic objects.
198
+ For object fields, checks if the field itself uses $ref or is a dynamic object.
199
+
200
+ Args:
201
+ field_schema: Schema to check
202
+
203
+ Returns:
204
+ True if field uses $ref, is a dynamic object, or contains items with $ref/dynamic objects
205
+ """
206
+ # If the field itself has a $ref, that's good
207
+ if "$ref" in field_schema:
208
+ return True
209
+
210
+ # If the field is a dynamic object (no declared properties), that's also acceptable
211
+ if _is_dynamic_object(field_schema):
212
+ return True
213
+
214
+ # If the field is an array, check if items have a $ref or are dynamic objects
215
+ if field_schema.get("type") == "array" and "items" in field_schema:
216
+ items_schema = field_schema["items"]
217
+ return "$ref" in items_schema or _is_dynamic_object(items_schema)
218
+
219
+ return False
220
+
221
+
222
+ def _check_field_has_ref_or_dynamic(field_path: str, schema: Dict[str, Any]) -> bool:
223
+ """Check if a field in the schema uses $ref or is a dynamic object.
224
+
225
+ For array fields, checks if the array items use $ref or are dynamic objects.
226
+ For object fields, checks if the field itself uses $ref or is a dynamic object.
227
+
228
+ Dynamic objects (inline objects with no declared properties) are allowed because
229
+ they represent flexible schemas like custom objects or arbitrary key-value stores.
230
+
231
+ Args:
232
+ field_path: Field path (e.g., "users" or "data")
233
+ schema: Response schema (unresolved, to check for $ref presence)
234
+
235
+ Returns:
236
+ True if field uses $ref, is a dynamic object, or contains items with $ref/dynamic objects
237
+ """
238
+ if not schema or not isinstance(schema, dict):
239
+ return False
240
+
241
+ # For simple field (e.g., "users")
242
+ if "." not in field_path:
243
+ if "properties" in schema:
244
+ field_schema = schema["properties"].get(field_path, {})
245
+ return _schema_has_ref_or_dynamic(field_schema)
246
+ return False
247
+
248
+ # For nested field (e.g., "data.items"), traverse
249
+ parts = field_path.split(".")
250
+ current = schema
251
+
252
+ for part in parts:
253
+ if not isinstance(current, dict):
254
+ return False
255
+
256
+ # Handle array types by descending into items first
257
+ if current.get("type") == "array" and "items" in current:
258
+ current = current["items"]
259
+ if not isinstance(current, dict):
260
+ return False
261
+
262
+ if "properties" in current:
263
+ current = current["properties"].get(part, {})
264
+ else:
265
+ return False
266
+
267
+ # Check if the final field has a $ref, is a dynamic object, or is an array with items that qualify
268
+ return _schema_has_ref_or_dynamic(current)
269
+
270
+
271
+ def _check_field_in_schema(jsonpath_expr: str, schema: Dict[str, Any]) -> bool:
272
+ """Check if a JSONPath expression corresponds to fields in schema.
273
+
274
+ Args:
275
+ jsonpath_expr: JSONPath (e.g., "$.records", "$.pagination.cursor")
276
+ schema: Response schema
277
+
278
+ Returns:
279
+ True if path exists in schema, False otherwise
280
+ """
281
+ field_path = _extract_field_from_jsonpath(jsonpath_expr)
282
+
283
+ if not field_path or not schema:
284
+ return False
285
+
286
+ # Navigate schema to check field exists
287
+ current = schema
288
+ parts = field_path.split(".")
289
+
290
+ for part in parts:
291
+ if not isinstance(current, dict):
292
+ return False
293
+
294
+ # Check in properties
295
+ if "properties" in current and part in current["properties"]:
296
+ current = current["properties"][part]
297
+ continue
298
+
299
+ # Check in additionalProperties (for dynamic schemas)
300
+ if "additionalProperties" in current:
301
+ return True # Can't validate further for dynamic schemas
302
+
303
+ # Field not found
304
+ return False
305
+
306
+ return True
307
+
308
+
309
+ def validate_record_extractor_has_ref(
310
+ endpoint: EndpointDefinition,
311
+ raw_spec: Dict[str, Any],
312
+ entity_name: str,
313
+ action: str,
314
+ ) -> Tuple[bool, List[str], List[str]]:
315
+ """
316
+ Validate that x-airbyte-record-extractor points to a field with a $ref or dynamic object.
317
+
318
+ When an operation defines x-airbyte-record-extractor, the JSONPath should point
319
+ to a field in the response schema that either:
320
+ 1. Uses a $ref for proper typing with named schemas, OR
321
+ 2. Is a dynamic object (type: object with no declared properties) for flexible schemas
322
+
323
+ Dynamic objects are acceptable for cases like custom objects or arbitrary key-value
324
+ stores where the schema is intentionally flexible.
325
+
326
+ Args:
327
+ endpoint: The endpoint definition with response_schema
328
+ raw_spec: Raw OpenAPI spec dict (unresolved) to check for $refs
329
+ entity_name: Entity name (for error messages)
330
+ action: Action name (for error messages)
331
+
332
+ Returns:
333
+ Tuple of (is_valid, errors, warnings)
334
+ - is_valid: False if extractor points to inline schema with declared properties
335
+ - errors: List of error messages
336
+ - warnings: List of warning messages
337
+
338
+ Example:
339
+ Good: x-airbyte-record-extractor: $.users
340
+ Response schema has: users: { $ref: "#/components/schemas/User" }
341
+
342
+ Good: x-airbyte-record-extractor: $.custom_fields
343
+ Response schema has: custom_fields: { type: object, additionalProperties: true }
344
+ (no properties declared = dynamic schema)
345
+
346
+ Bad: x-airbyte-record-extractor: $.data
347
+ Response schema has: data: { type: object, properties: { id: {...}, name: {...} } }
348
+ (inline schema with declared properties)
349
+ """
350
+ errors = []
351
+ warnings = []
352
+
353
+ if not endpoint.record_extractor:
354
+ return True, errors, warnings
355
+
356
+ if not endpoint.response_schema:
357
+ warnings.append(f"{entity_name}.{action}: Has x-airbyte-record-extractor but no response schema defined")
358
+ return True, errors, warnings
359
+
360
+ # Parse JSONPath to find target field
361
+ extractor_path = endpoint.record_extractor
362
+
363
+ try:
364
+ # Extract the field name from JSONPath (e.g., "$.users" -> "users")
365
+ target_field = _extract_field_from_jsonpath(extractor_path)
366
+
367
+ if not target_field:
368
+ warnings.append(f"{entity_name}.{action}: Cannot parse x-airbyte-record-extractor JSONPath: {extractor_path}")
369
+ return True, errors, warnings
370
+
371
+ # Find the unresolved response schema in the raw spec to check for $ref
372
+ unresolved_schema = _find_unresolved_response_schema(raw_spec, endpoint.path, endpoint.method)
373
+
374
+ if not unresolved_schema:
375
+ # If we can't find the unresolved schema, skip validation
376
+ return True, errors, warnings
377
+
378
+ # Check if the target field has a $ref or is a dynamic object in the unresolved schema
379
+ has_ref_or_dynamic = _check_field_has_ref_or_dynamic(target_field, unresolved_schema)
380
+
381
+ if not has_ref_or_dynamic:
382
+ errors.append(
383
+ f"{entity_name}.{action}: x-airbyte-record-extractor '{extractor_path}' points to field "
384
+ f"'{target_field}' which uses an inline schema with declared properties. Records should "
385
+ f"reference a named schema (e.g., $ref: '#/components/schemas/{target_field.title()}') for proper typing."
386
+ )
387
+ return False, errors, warnings
388
+
389
+ except Exception as e:
390
+ warnings.append(f"{entity_name}.{action}: Error validating x-airbyte-record-extractor: {str(e)}")
391
+ return True, errors, warnings
392
+
393
+ return True, errors, warnings
394
+
395
+
396
+ def _find_unresolved_response_schema(raw_spec: Dict[str, Any], path: str, method: str) -> Dict[str, Any] | None:
397
+ """Find the unresolved response schema from the raw OpenAPI spec.
398
+
399
+ If the response schema itself is a $ref, resolves it once to get the actual schema.
400
+ This allows us to check the properties within that schema for nested $refs.
401
+
402
+ Args:
403
+ raw_spec: Raw OpenAPI spec dict
404
+ path: API path (e.g., "/v1/users")
405
+ method: HTTP method (e.g., "GET")
406
+
407
+ Returns:
408
+ Unresolved response schema dict or None if not found
409
+ """
410
+ try:
411
+ paths = raw_spec.get("paths", {})
412
+ if path not in paths:
413
+ return None
414
+
415
+ path_item = paths[path]
416
+ operation = path_item.get(method.lower())
417
+ if not operation:
418
+ return None
419
+
420
+ responses = operation.get("responses", {})
421
+ response_200 = responses.get("200")
422
+ if not response_200:
423
+ return None
424
+
425
+ content = response_200.get("content", {})
426
+ json_content = content.get("application/json")
427
+ if not json_content:
428
+ return None
429
+
430
+ schema = json_content.get("schema")
431
+
432
+ # If the schema itself is a $ref, resolve it once to get to the actual schema
433
+ # This is needed for cases like: schema: { $ref: "#/components/schemas/UsersResponse" }
434
+ # We want to check the properties inside UsersResponse
435
+ if schema and "$ref" in schema:
436
+ ref_path = schema["$ref"]
437
+ # Parse the reference (e.g., "#/components/schemas/UsersResponse")
438
+ if ref_path.startswith("#/"):
439
+ parts = ref_path[2:].split("/")
440
+ resolved = raw_spec
441
+ for part in parts:
442
+ resolved = resolved.get(part, {})
443
+ if not resolved:
444
+ return None
445
+ return resolved
446
+
447
+ return schema
448
+ except Exception:
449
+ return None
450
+
451
+
452
+ def validate_meta_extractor_fields(
453
+ endpoint: EndpointDefinition,
454
+ cassette_path: Path,
455
+ entity_name: str,
456
+ action: str,
457
+ ) -> Tuple[bool, List[str], List[str]]:
458
+ """
459
+ Validate x-airbyte-meta-extractor fields exist in cassettes and schema.
460
+
461
+ Checks that:
462
+ 1. Fields extracted by meta-extractor exist in actual cassette responses
463
+ 2. Those fields are declared in the response schema
464
+
465
+ Args:
466
+ endpoint: Endpoint definition with meta_extractor and response_schema
467
+ cassette_path: Path to cassette file
468
+ entity_name: Entity name (for error messages)
469
+ action: Action name (for error messages)
470
+
471
+ Returns:
472
+ Tuple of (is_valid, errors, warnings)
473
+ - is_valid: Always True (missing fields are warnings, not errors)
474
+ - errors: Empty (meta-extractor issues are non-blocking)
475
+ - warnings: List of warning messages
476
+ """
477
+ errors = []
478
+ warnings = []
479
+
480
+ if not endpoint.meta_extractor:
481
+ return True, errors, warnings
482
+
483
+ try:
484
+ # Load cassette to get actual response
485
+ spec = load_test_spec(cassette_path, auth_config={})
486
+ response_body = spec.captured_response.body
487
+
488
+ # Validate each meta extractor field
489
+ for field_name, jsonpath_expr in endpoint.meta_extractor.items():
490
+ # Check 1: Does the JSONPath find data in the actual response?
491
+ try:
492
+ parsed_expr = parse_jsonpath(jsonpath_expr)
493
+ matches = [match.value for match in parsed_expr.find(response_body)]
494
+
495
+ if not matches:
496
+ warnings.append(
497
+ f"{entity_name}.{action}: x-airbyte-meta-extractor field '{field_name}' "
498
+ f"with JSONPath '{jsonpath_expr}' found no matches in cassette response"
499
+ )
500
+ except Exception as e:
501
+ warnings.append(
502
+ f"{entity_name}.{action}: x-airbyte-meta-extractor field '{field_name}' has invalid JSONPath '{jsonpath_expr}': {str(e)}"
503
+ )
504
+
505
+ # Check 2: Is this field path declared in the response schema?
506
+ if endpoint.response_schema:
507
+ field_in_schema = _check_field_in_schema(jsonpath_expr, endpoint.response_schema)
508
+
509
+ if not field_in_schema:
510
+ warnings.append(
511
+ f"{entity_name}.{action}: x-airbyte-meta-extractor field '{field_name}' "
512
+ f"extracts from '{jsonpath_expr}' but this path is not declared in response schema"
513
+ )
514
+
515
+ except Exception as e:
516
+ warnings.append(f"{entity_name}.{action}: Error validating x-airbyte-meta-extractor: {str(e)}")
517
+
518
+ return True, errors, warnings
519
+
520
+
521
+ def validate_connector_readiness(connector_dir: str | Path) -> Dict[str, Any]:
522
+ """
523
+ Validate that a connector is ready to ship.
524
+
525
+ Checks that:
526
+ - connector.yaml exists and is valid
527
+ - For each entity/action defined, corresponding cassette(s) exist
528
+ - Response schemas in connector.yaml match cassette responses
529
+ - Detects fields in responses that are not declared in the schema (as warnings)
530
+
531
+ Args:
532
+ connector_dir: Path to the connector directory (e.g., "/path/to/integrations/stripe")
533
+
534
+ Returns:
535
+ Dict with validation results including:
536
+ - success: Overall success status
537
+ - connector_name: Name of the connector
538
+ - validation_results: List of results for each entity/action
539
+ - summary: Summary statistics
540
+
541
+ Each validation result includes:
542
+ - warnings: Human-readable warnings (e.g., "Undeclared field in response: data[].extra_field")
543
+ - errors: Actual schema validation errors (e.g., missing required fields, type mismatches)
544
+
545
+ Note: Undeclared fields are surfaced as warnings, not errors. This allows connectors
546
+ with dynamic/flexible schemas (like custom objects) to pass validation while still
547
+ highlighting fields that could be added to the schema. Non-dynamic schemas are expected
548
+ to have all fields in the schema.
549
+
550
+ Example:
551
+ validate_connector_readiness("/path/to/integrations/stripe")
552
+ """
553
+ connector_path = Path(connector_dir)
554
+
555
+ if not connector_path.exists():
556
+ return {
557
+ "success": False,
558
+ "error": f"Connector directory not found: {connector_dir}",
559
+ }
560
+
561
+ config_file = connector_path / "connector.yaml"
562
+ if not config_file.exists():
563
+ return {
564
+ "success": False,
565
+ "error": f"connector.yaml not found in {connector_dir}",
566
+ }
567
+
568
+ try:
569
+ config = load_connector_model(config_file)
570
+ except ConnectorModelLoaderError as e:
571
+ return {"success": False, "error": f"Failed to load connector.yaml: {str(e)}"}
572
+
573
+ # Load the raw spec for extractor validation
574
+ try:
575
+ with open(config_file) as f:
576
+ raw_spec = yaml.safe_load(f)
577
+ except Exception:
578
+ raw_spec = {}
579
+
580
+ cassettes_dir = connector_path / "tests" / "cassettes"
581
+ cassette_map = build_cassette_map(cassettes_dir)
582
+
583
+ validation_results = []
584
+ total_operations = 0
585
+ operations_with_cassettes = 0
586
+ operations_missing_cassettes = 0
587
+ total_cassettes = 0
588
+ cassettes_valid = 0
589
+ cassettes_invalid = 0
590
+ total_warnings = 0
591
+ total_errors = 0
592
+
593
+ for entity in config.entities:
594
+ for action in entity.actions:
595
+ total_operations += 1
596
+
597
+ key = (entity.name, action.value)
598
+ cassette_paths = cassette_map.get(key, [])
599
+
600
+ endpoint = entity.endpoints[action]
601
+ # Check if this is a download action
602
+ is_download = action == Action.DOWNLOAD
603
+ # Check if operation is marked as untested
604
+ is_untested = endpoint.untested
605
+
606
+ if not cassette_paths:
607
+ # For untested operations, add a warning instead of an error
608
+ if is_untested:
609
+ total_warnings += 1
610
+ validation_results.append(
611
+ {
612
+ "entity": entity.name,
613
+ "action": action.value,
614
+ "cassettes_found": 0,
615
+ "cassette_paths": [],
616
+ "schema_defined": endpoint.response_schema is not None,
617
+ "is_download": is_download,
618
+ "untested": True,
619
+ "schema_validation": [],
620
+ "warnings": [
621
+ f"Operation {entity.name}.{action.value} is marked as untested "
622
+ f"(x-airbyte-untested: true) and has no cassettes. Validation skipped."
623
+ ],
624
+ }
625
+ )
626
+ continue
627
+
628
+ # For tested operations, this is an error
629
+ operations_missing_cassettes += 1
630
+ validation_results.append(
631
+ {
632
+ "entity": entity.name,
633
+ "action": action.value,
634
+ "cassettes_found": 0,
635
+ "cassette_paths": [],
636
+ "schema_defined": endpoint.response_schema is not None,
637
+ "is_download": is_download,
638
+ "schema_validation": [],
639
+ }
640
+ )
641
+ continue
642
+
643
+ operations_with_cassettes += 1
644
+ total_cassettes += len(cassette_paths)
645
+
646
+ response_schema = endpoint.response_schema
647
+ schema_defined = response_schema is not None
648
+
649
+ # Validate x-airbyte-record-extractor (once per endpoint, not per cassette)
650
+ if endpoint.record_extractor and not is_download:
651
+ is_valid, extractor_errors, extractor_warnings = validate_record_extractor_has_ref(
652
+ endpoint=endpoint,
653
+ raw_spec=raw_spec,
654
+ entity_name=entity.name,
655
+ action=action.value,
656
+ )
657
+
658
+ if not is_valid:
659
+ cassettes_invalid += len(cassette_paths)
660
+ total_errors += len(extractor_errors) * len(cassette_paths)
661
+
662
+ total_warnings += len(extractor_warnings)
663
+
664
+ # If record extractor validation fails, add error to all cassettes for this endpoint
665
+ if not is_valid:
666
+ schema_validation = [
667
+ {
668
+ "cassette": str(p.name),
669
+ "valid": False,
670
+ "errors": extractor_errors,
671
+ "warnings": extractor_warnings,
672
+ }
673
+ for p in cassette_paths
674
+ ]
675
+ validation_results.append(
676
+ {
677
+ "entity": entity.name,
678
+ "action": action.value,
679
+ "cassettes_found": len(cassette_paths),
680
+ "cassette_paths": [str(p.name) for p in cassette_paths],
681
+ "schema_defined": schema_defined,
682
+ "is_download": is_download,
683
+ "schema_validation": schema_validation,
684
+ }
685
+ )
686
+ continue
687
+
688
+ schema_validation = []
689
+ for cassette_path in cassette_paths:
690
+ try:
691
+ spec = load_test_spec(cassette_path, auth_config={})
692
+
693
+ # For download actions, validate that captured_file_request/response exist
694
+ if is_download:
695
+ has_file_request = hasattr(spec, "captured_file_request") and spec.captured_file_request is not None
696
+ has_file_response = hasattr(spec, "captured_file_response") and spec.captured_file_response is not None
697
+
698
+ if has_file_request and has_file_response:
699
+ cassettes_valid += 1
700
+ schema_validation.append(
701
+ {
702
+ "cassette": str(cassette_path.name),
703
+ "valid": True,
704
+ "errors": [],
705
+ "warnings": [],
706
+ }
707
+ )
708
+ else:
709
+ cassettes_invalid += 1
710
+ total_errors += 1
711
+ errors = []
712
+ if not has_file_request:
713
+ errors.append("Missing captured_file_request for download action")
714
+ elif not has_file_response:
715
+ errors.append("Missing captured_file_response for download action")
716
+ schema_validation.append(
717
+ {
718
+ "cassette": str(cassette_path.name),
719
+ "valid": False,
720
+ "errors": errors,
721
+ "warnings": [],
722
+ }
723
+ )
724
+ continue
725
+
726
+ # For non-download actions, validate response schema
727
+ response_body = spec.captured_response.body
728
+
729
+ if response_schema:
730
+ is_valid, errors = validate_response_against_schema(response_body, response_schema)
731
+
732
+ undeclared_fields = find_undeclared_fields(response_body, response_schema)
733
+
734
+ warnings = []
735
+ if undeclared_fields:
736
+ warnings = [f"Undeclared field in response: {field}" for field in undeclared_fields]
737
+
738
+ # Validate x-airbyte-meta-extractor fields
739
+ if endpoint.meta_extractor:
740
+ (
741
+ _,
742
+ meta_errors,
743
+ meta_warnings,
744
+ ) = validate_meta_extractor_fields(
745
+ endpoint=endpoint,
746
+ cassette_path=cassette_path,
747
+ entity_name=entity.name,
748
+ action=action.value,
749
+ )
750
+ warnings.extend(meta_warnings)
751
+ # Meta extractor errors are also treated as warnings (non-blocking)
752
+ warnings.extend(meta_errors)
753
+
754
+ if is_valid:
755
+ cassettes_valid += 1
756
+ else:
757
+ cassettes_invalid += 1
758
+
759
+ total_warnings += len(warnings)
760
+ total_errors += len(errors)
761
+
762
+ schema_validation.append(
763
+ {
764
+ "cassette": str(cassette_path.name),
765
+ "valid": is_valid,
766
+ "errors": errors,
767
+ "warnings": warnings,
768
+ }
769
+ )
770
+ else:
771
+ total_errors += 1
772
+ schema_validation.append(
773
+ {
774
+ "cassette": str(cassette_path.name),
775
+ "valid": None,
776
+ "errors": ["No response schema defined in connector.yaml"],
777
+ "warnings": [],
778
+ }
779
+ )
780
+
781
+ except Exception as e:
782
+ cassettes_invalid += 1
783
+ total_errors += 1
784
+ schema_validation.append(
785
+ {
786
+ "cassette": str(cassette_path.name),
787
+ "valid": False,
788
+ "errors": [f"Failed to load/validate cassette: {str(e)}"],
789
+ "warnings": [],
790
+ }
791
+ )
792
+
793
+ validation_results.append(
794
+ {
795
+ "entity": entity.name,
796
+ "action": action.value,
797
+ "cassettes_found": len(cassette_paths),
798
+ "cassette_paths": [str(p.name) for p in cassette_paths],
799
+ "schema_defined": schema_defined,
800
+ "is_download": is_download,
801
+ "schema_validation": schema_validation,
802
+ }
803
+ )
804
+
805
+ success = operations_missing_cassettes == 0 and cassettes_invalid == 0 and total_operations > 0
806
+
807
+ return {
808
+ "success": success,
809
+ "connector_name": config.name,
810
+ "connector_path": str(connector_path),
811
+ "validation_results": validation_results,
812
+ "summary": {
813
+ "total_operations": total_operations,
814
+ "operations_with_cassettes": operations_with_cassettes,
815
+ "operations_missing_cassettes": operations_missing_cassettes,
816
+ "total_cassettes": total_cassettes,
817
+ "cassettes_valid": cassettes_valid,
818
+ "cassettes_invalid": cassettes_invalid,
819
+ "total_warnings": total_warnings,
820
+ "total_errors": total_errors,
821
+ },
822
+ }