airbyte-agent-facebook-marketing 0.1.2__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,970 @@
1
+ """
2
+ Replication compatibility validation for Airbyte connectors.
3
+
4
+ Validates that connector.yaml replication mappings reference valid fields
5
+ in the Airbyte source connector's spec from the registry.
6
+ """
7
+
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ import httpx
12
+ import yaml
13
+
14
+ REGISTRY_URL = "https://connectors.airbyte.com/files/metadata/airbyte/source-{name}/latest/cloud.json"
15
+
16
+
17
+ def fetch_airbyte_registry_metadata(connector_name: str) -> dict[str, Any] | None:
18
+ """Fetch connector metadata from Airbyte cloud registry.
19
+
20
+ Args:
21
+ connector_name: Name from x-airbyte-connector-name (e.g., "zendesk-support", "github")
22
+
23
+ Returns:
24
+ Registry metadata dict or None if not found
25
+ """
26
+ url = REGISTRY_URL.format(name=connector_name)
27
+
28
+ try:
29
+ response = httpx.get(url, timeout=15.0)
30
+ if response.status_code == 200:
31
+ return response.json()
32
+ except (httpx.HTTPError, ValueError):
33
+ pass
34
+ return None
35
+
36
+
37
+ def _get_available_paths_at_level(spec: dict[str, Any], prefix: str = "") -> list[str]:
38
+ """Get list of available property paths at a given level in the spec.
39
+
40
+ Used for helpful error messages showing what paths are available.
41
+ """
42
+ paths = []
43
+ properties = spec.get("properties", {})
44
+
45
+ for key in properties:
46
+ full_path = f"{prefix}.{key}" if prefix else key
47
+ paths.append(full_path)
48
+
49
+ # Also check oneOf variants
50
+ for one_of_item in spec.get("oneOf", []):
51
+ for key in one_of_item.get("properties", {}):
52
+ full_path = f"{prefix}.{key}" if prefix else key
53
+ if full_path not in paths:
54
+ paths.append(full_path)
55
+
56
+ return sorted(paths)
57
+
58
+
59
+ def resolve_spec_path(spec: dict[str, Any], path: str) -> tuple[bool, str | None]:
60
+ """Check if a dotted path resolves in the spec.
61
+
62
+ Handles nested structures including:
63
+ - Simple properties: "start_date" -> properties.start_date
64
+ - Nested paths: "credentials.access_token" -> properties.credentials.*.properties.access_token
65
+ - oneOf structures: Searches all oneOf variants
66
+
67
+ Args:
68
+ spec: The connectionSpecification from registry
69
+ path: Dotted path like "credentials.access_token"
70
+
71
+ Returns:
72
+ (found: bool, error_detail: str | None)
73
+ """
74
+ if not path:
75
+ return False, "Empty path"
76
+
77
+ parts = path.split(".")
78
+ current = spec
79
+
80
+ for i, part in enumerate(parts):
81
+ properties = current.get("properties", {})
82
+
83
+ if part in properties:
84
+ current = properties[part]
85
+ continue
86
+
87
+ # Check oneOf variants
88
+ one_of = current.get("oneOf", [])
89
+ found_in_one_of = False
90
+ for variant in one_of:
91
+ variant_props = variant.get("properties", {})
92
+ if part in variant_props:
93
+ current = variant_props[part]
94
+ found_in_one_of = True
95
+ break
96
+
97
+ if found_in_one_of:
98
+ continue
99
+
100
+ # Not found - build helpful error message
101
+ current_path = ".".join(parts[:i]) if i > 0 else "(root)"
102
+ available = _get_available_paths_at_level(current, ".".join(parts[:i]) if i > 0 else "")
103
+ available_str = ", ".join(available[:10]) if available else "(none)"
104
+ if len(available) > 10:
105
+ available_str += f", ... ({len(available) - 10} more)"
106
+
107
+ return False, f"Path segment '{part}' not found at {current_path}. Available: {available_str}"
108
+
109
+ return True, None
110
+
111
+
112
+ def validate_connector_id(
113
+ connector_id: str,
114
+ connector_name: str,
115
+ registry_metadata: dict[str, Any] | None,
116
+ ) -> tuple[bool, list[str], list[str], bool]:
117
+ """Validate connector ID matches registry.
118
+
119
+ Args:
120
+ connector_id: The x-airbyte-connector-id from connector.yaml
121
+ connector_name: The x-airbyte-connector-name from connector.yaml
122
+ registry_metadata: Fetched registry metadata or None
123
+
124
+ Returns:
125
+ (is_valid, errors, warnings, skip_remaining_checks)
126
+ - is_valid: True if no blocking errors
127
+ - errors: List of error messages
128
+ - warnings: List of warning messages
129
+ - skip_remaining_checks: True if remaining replication checks should be skipped
130
+ """
131
+ errors = []
132
+ warnings = []
133
+
134
+ if registry_metadata is None:
135
+ # Connector not in registry - warn but don't fail (could be new connector)
136
+ warnings.append(f"Connector '{connector_name}' not found in Airbyte registry. " f"Skipping replication compatibility checks.")
137
+ return True, errors, warnings, True # Valid (no blocking error), but skip remaining checks
138
+
139
+ registry_id = registry_metadata.get("sourceDefinitionId", "")
140
+
141
+ if connector_id.lower() != registry_id.lower():
142
+ errors.append(
143
+ f"Connector ID mismatch: connector.yaml has '{connector_id}' but " f"Airbyte registry has '{registry_id}' for '{connector_name}'."
144
+ )
145
+ return False, errors, warnings, True # Invalid, skip remaining checks
146
+
147
+ return True, errors, warnings, False # Valid, continue with checks
148
+
149
+
150
+ def validate_auth_key_mapping(
151
+ auth_mappings: dict[str, str],
152
+ spec: dict[str, Any],
153
+ scheme_name: str,
154
+ ) -> tuple[bool, list[str], list[str]]:
155
+ """Validate replication_auth_key_mapping paths exist in spec.
156
+
157
+ Args:
158
+ auth_mappings: Dict like {"credentials.access_token": "access_token"}
159
+ spec: connectionSpecification from registry
160
+ scheme_name: Name of the security scheme (for error messages)
161
+
162
+ Returns:
163
+ (is_valid, errors, warnings)
164
+ """
165
+ errors = []
166
+ warnings = []
167
+
168
+ for source_path, _target_key in auth_mappings.items():
169
+ found, error_detail = resolve_spec_path(spec, source_path)
170
+ if not found:
171
+ errors.append(f"replication_auth_key_mapping in '{scheme_name}': " f"path '{source_path}' not found in Airbyte spec. {error_detail}")
172
+
173
+ return len(errors) == 0, errors, warnings
174
+
175
+
176
+ def validate_config_key_mapping(
177
+ config_mappings: dict[str, str],
178
+ spec: dict[str, Any],
179
+ ) -> tuple[bool, list[str], list[str]]:
180
+ """Validate replication_config_key_mapping targets exist in spec.
181
+
182
+ Args:
183
+ config_mappings: Dict like {"start_date": "start_date"}
184
+ spec: connectionSpecification from registry
185
+
186
+ Returns:
187
+ (is_valid, errors, warnings)
188
+ """
189
+ errors = []
190
+ warnings = []
191
+
192
+ for _local_key, target_path in config_mappings.items():
193
+ found, error_detail = resolve_spec_path(spec, target_path)
194
+ if not found:
195
+ errors.append(f"replication_config_key_mapping: target path '{target_path}' " f"not found in Airbyte spec. {error_detail}")
196
+
197
+ return len(errors) == 0, errors, warnings
198
+
199
+
200
+ def validate_environment_mapping(
201
+ env_mappings: dict[str, Any],
202
+ spec: dict[str, Any],
203
+ ) -> tuple[bool, list[str], list[str]]:
204
+ """Validate x-airbyte-replication-environment-mapping targets exist.
205
+
206
+ Handles both simple string mappings and transform dicts.
207
+
208
+ Args:
209
+ env_mappings: Dict like {"subdomain": "subdomain"} or
210
+ {"domain": {"source": "subdomain", "format": "..."}}
211
+ spec: connectionSpecification from registry
212
+
213
+ Returns:
214
+ (is_valid, errors, warnings)
215
+ """
216
+ errors = []
217
+ warnings = []
218
+
219
+ for _env_key, mapping_value in env_mappings.items():
220
+ # Extract the target path from the mapping
221
+ if isinstance(mapping_value, str):
222
+ target_path = mapping_value
223
+ elif isinstance(mapping_value, dict):
224
+ # Transform mapping - the target is still the key in the spec
225
+ # For transforms like {"source": "subdomain", "format": "..."}, the target
226
+ # is typically the same as the env_key or specified separately
227
+ # The mapping maps to the spec field, not from it
228
+ target_path = _env_key # The env key maps to the same-named spec field
229
+ else:
230
+ warnings.append(f"x-airbyte-replication-environment-mapping: " f"unexpected mapping type for '{_env_key}': {type(mapping_value)}")
231
+ continue
232
+
233
+ found, error_detail = resolve_spec_path(spec, target_path)
234
+ if not found:
235
+ errors.append(f"x-airbyte-replication-environment-mapping: " f"target path '{target_path}' not found in Airbyte spec. {error_detail}")
236
+
237
+ return len(errors) == 0, errors, warnings
238
+
239
+
240
+ def validate_suggested_streams_coverage(
241
+ connector_entities: list[dict[str, str | None]],
242
+ suggested_streams: list[str],
243
+ skip_streams: list[str] | None = None,
244
+ ) -> tuple[bool, list[str], list[str]]:
245
+ """Check connector entities cover all suggested streams.
246
+
247
+ Args:
248
+ connector_entities: List of entity dicts with 'name' and optional 'stream_name'
249
+ (from x-airbyte-entity and x-airbyte-stream-name)
250
+ suggested_streams: List of stream names from registry
251
+ skip_streams: Optional list of stream names to skip validation for
252
+
253
+ Returns:
254
+ (is_valid, errors, warnings)
255
+ """
256
+ errors = []
257
+ warnings = []
258
+ skip_streams = skip_streams or []
259
+
260
+ if not suggested_streams:
261
+ # No suggested streams in registry - nothing to validate
262
+ return True, errors, warnings
263
+
264
+ # Build set of covered stream names from connector entities
265
+ covered_streams: set[str] = set()
266
+ for entity in connector_entities:
267
+ entity_name = entity.get("name", "")
268
+ if entity_name:
269
+ covered_streams.add(entity_name)
270
+ # x-airbyte-stream-name overrides entity name for stream matching
271
+ stream_name = entity.get("stream_name")
272
+ if stream_name:
273
+ covered_streams.add(stream_name)
274
+
275
+ # Check each suggested stream is covered (excluding skipped ones)
276
+ missing_streams = []
277
+ skipped_streams = []
278
+ for stream in suggested_streams:
279
+ if stream in skip_streams:
280
+ skipped_streams.append(stream)
281
+ elif stream not in covered_streams:
282
+ missing_streams.append(stream)
283
+
284
+ if skipped_streams:
285
+ warnings.append(f"Skipped suggested streams (via x-airbyte-skip-suggested-streams): {', '.join(skipped_streams)}")
286
+
287
+ if missing_streams:
288
+ errors.append(
289
+ f"Suggested streams not covered by connector entities: {', '.join(missing_streams)}. "
290
+ f"Add entities with matching x-airbyte-entity names or x-airbyte-stream-name attributes, "
291
+ f"or add to x-airbyte-skip-suggested-streams to skip."
292
+ )
293
+
294
+ return len(errors) == 0, errors, warnings
295
+
296
+
297
+ def _extract_auth_mappings_from_spec(raw_spec: dict[str, Any]) -> dict[str, dict[str, str]]:
298
+ """Extract all replication_auth_key_mapping from security schemes.
299
+
300
+ Returns:
301
+ Dict mapping scheme_name -> auth_mappings
302
+ """
303
+ result = {}
304
+ security_schemes = raw_spec.get("components", {}).get("securitySchemes", {})
305
+
306
+ for scheme_name, scheme_def in security_schemes.items():
307
+ auth_config = scheme_def.get("x-airbyte-auth-config", {})
308
+ auth_mappings = auth_config.get("replication_auth_key_mapping", {})
309
+ if auth_mappings:
310
+ result[scheme_name] = auth_mappings
311
+
312
+ return result
313
+
314
+
315
+ def _extract_config_mappings_from_spec(raw_spec: dict[str, Any]) -> dict[str, str]:
316
+ """Extract replication_config_key_mapping from info section."""
317
+ replication_config = raw_spec.get("info", {}).get("x-airbyte-replication-config", {})
318
+ return replication_config.get("replication_config_key_mapping", {})
319
+
320
+
321
+ def _extract_environment_mappings_from_spec(raw_spec: dict[str, Any]) -> dict[str, Any]:
322
+ """Extract x-airbyte-replication-environment-mapping from servers."""
323
+ servers = raw_spec.get("servers", [])
324
+ result = {}
325
+
326
+ for server in servers:
327
+ env_mapping = server.get("x-airbyte-replication-environment-mapping", {})
328
+ result.update(env_mapping)
329
+
330
+ return result
331
+
332
+
333
+ def _extract_cache_entities_from_spec(raw_spec: dict[str, Any]) -> list[dict[str, Any]]:
334
+ """Extract x-airbyte-cache entities from info section."""
335
+ cache_config = raw_spec.get("info", {}).get("x-airbyte-cache", {})
336
+ return cache_config.get("entities", [])
337
+
338
+
339
+ def _extract_connector_entities_from_spec(raw_spec: dict[str, Any]) -> list[dict[str, str | None]]:
340
+ """Extract entities from connector spec paths/operations.
341
+
342
+ Entities are defined via x-airbyte-entity on operations. Each entity can have
343
+ an optional x-airbyte-stream-name on its response schema that maps to the
344
+ Airbyte stream name.
345
+
346
+ Stream name resolution order:
347
+ 1. x-airbyte-stream-name on schema (explicit stream name)
348
+ 2. Schema name when x-airbyte-entity-name points to an entity (e.g., Account schema with x-airbyte-entity-name: accounts)
349
+ 3. Entity name itself (fallback)
350
+
351
+ Returns:
352
+ List of dicts with 'name' (entity name) and 'stream_name' (optional stream name)
353
+ """
354
+ entities: dict[str, str | None] = {} # entity_name -> stream_name
355
+
356
+ paths = raw_spec.get("paths", {})
357
+ schemas = raw_spec.get("components", {}).get("schemas", {})
358
+
359
+ # First pass: collect all entity names from operations
360
+ for _path, path_item in paths.items():
361
+ for method in ["get", "post", "put", "patch", "delete", "options", "head", "trace"]:
362
+ operation = path_item.get(method) if isinstance(path_item, dict) else None
363
+ if not operation:
364
+ continue
365
+
366
+ entity_name = operation.get("x-airbyte-entity")
367
+ if entity_name and entity_name not in entities:
368
+ entities[entity_name] = None
369
+
370
+ # Second pass: look for x-airbyte-stream-name or x-airbyte-entity-name in schemas
371
+ for schema_name, schema_def in schemas.items():
372
+ if not isinstance(schema_def, dict):
373
+ continue
374
+
375
+ stream_name = schema_def.get("x-airbyte-stream-name")
376
+ entity_name_attr = schema_def.get("x-airbyte-entity-name")
377
+
378
+ if stream_name:
379
+ # Explicit x-airbyte-stream-name takes precedence
380
+ if entity_name_attr and entity_name_attr in entities:
381
+ entities[entity_name_attr] = stream_name
382
+ elif schema_name.lower() in entities:
383
+ entities[schema_name.lower()] = stream_name
384
+ else:
385
+ for ent_name in entities:
386
+ if ent_name.lower() == schema_name.lower():
387
+ entities[ent_name] = stream_name
388
+ break
389
+ elif entity_name_attr and entity_name_attr in entities:
390
+ # No x-airbyte-stream-name, but x-airbyte-entity-name maps to an entity
391
+ # Use schema name as the stream name (e.g., Account schema for accounts entity)
392
+ if entities[entity_name_attr] is None:
393
+ entities[entity_name_attr] = schema_name
394
+
395
+ return [{"name": name, "stream_name": stream_name} for name, stream_name in entities.items()]
396
+
397
+
398
+ def _extract_skip_suggested_streams_from_spec(raw_spec: dict[str, Any]) -> list[str]:
399
+ """Extract x-airbyte-skip-suggested-streams from info section."""
400
+ return raw_spec.get("info", {}).get("x-airbyte-skip-suggested-streams", [])
401
+
402
+
403
+ def _extract_skip_auth_methods_from_spec(raw_spec: dict[str, Any]) -> list[str]:
404
+ """Extract x-airbyte-skip-auth-methods from info section."""
405
+ return raw_spec.get("info", {}).get("x-airbyte-skip-auth-methods", [])
406
+
407
+
408
+ # ============================================
409
+ # AUTH METHOD VALIDATION
410
+ # ============================================
411
+
412
+ MANIFEST_URL = "https://raw.githubusercontent.com/airbytehq/airbyte/refs/heads/master/airbyte-integrations/connectors/source-{name}/manifest.yaml"
413
+
414
+
415
+ def _resolve_manifest_refs(obj: Any, root: dict[str, Any]) -> Any:
416
+ """Recursively resolve $ref and string references in a manifest.
417
+
418
+ Handles both:
419
+ - Dict refs: {"$ref": "#/definitions/foo"}
420
+ - String refs: "#/definitions/foo"
421
+ """
422
+ if isinstance(obj, dict):
423
+ if "$ref" in obj and len(obj) == 1:
424
+ ref_path = obj["$ref"]
425
+ if ref_path.startswith("#/"):
426
+ parts = ref_path[2:].split("/")
427
+ resolved = root
428
+ for part in parts:
429
+ resolved = resolved.get(part, {})
430
+ return _resolve_manifest_refs(resolved, root)
431
+ return obj
432
+ return {k: _resolve_manifest_refs(v, root) for k, v in obj.items()}
433
+ elif isinstance(obj, list):
434
+ return [_resolve_manifest_refs(item, root) for item in obj]
435
+ elif isinstance(obj, str) and obj.startswith("#/definitions/"):
436
+ parts = obj[2:].split("/")
437
+ resolved = root
438
+ for part in parts:
439
+ resolved = resolved.get(part, {})
440
+ return _resolve_manifest_refs(resolved, root)
441
+ return obj
442
+
443
+
444
+ def fetch_airbyte_manifest(connector_name: str) -> dict[str, Any] | None:
445
+ """Fetch connector manifest from Airbyte GitHub repo.
446
+
447
+ Args:
448
+ connector_name: Name like "gong" or "hubspot"
449
+
450
+ Returns:
451
+ Parsed manifest dict with refs resolved, or None if not found
452
+ """
453
+ name = connector_name.lower().replace("_", "-").replace(" ", "-")
454
+ url = MANIFEST_URL.format(name=name)
455
+
456
+ try:
457
+ response = httpx.get(url, timeout=15.0)
458
+ if response.status_code == 200:
459
+ manifest = yaml.safe_load(response.text)
460
+ # Resolve all refs
461
+ return _resolve_manifest_refs(manifest, manifest)
462
+ except (httpx.HTTPError, ValueError, yaml.YAMLError):
463
+ pass
464
+ return None
465
+
466
+
467
+ def _normalize_auth_type(auth_type: str) -> str:
468
+ """Normalize auth type names to canonical form.
469
+
470
+ Maps various naming conventions to: oauth2, bearer, basic, api_key
471
+ """
472
+ auth_type_lower = auth_type.lower()
473
+
474
+ # OAuth variations
475
+ if "oauth" in auth_type_lower:
476
+ return "oauth2"
477
+
478
+ # Bearer token variations
479
+ if "bearer" in auth_type_lower or auth_type_lower == "bearerauth":
480
+ return "bearer"
481
+
482
+ # Basic auth variations
483
+ if "basic" in auth_type_lower:
484
+ return "basic"
485
+
486
+ # API key variations
487
+ if "api" in auth_type_lower and "key" in auth_type_lower:
488
+ return "api_key"
489
+ if auth_type_lower == "apikeyauthenticator":
490
+ return "api_key"
491
+
492
+ return auth_type_lower
493
+
494
+
495
+ def _extract_auth_types_from_manifest(manifest: dict[str, Any]) -> tuple[set[str], dict[str, str]]:
496
+ """Extract auth types from a resolved Airbyte manifest.
497
+
498
+ Looks for the authenticator used by the connector in:
499
+ - definitions.base_requester.authenticator
500
+ - definitions.retriever.requester.authenticator
501
+
502
+ The manifest should already have $ref references resolved.
503
+
504
+ Returns:
505
+ Tuple of:
506
+ - Set of normalized auth types (e.g., {"oauth2", "bearer"})
507
+ - Dict mapping auth type to SelectiveAuthenticator option key for display
508
+ (e.g., {"bearer": "Private App Credentials"})
509
+ """
510
+ auth_types: set[str] = set()
511
+ auth_option_keys: dict[str, str] = {} # Maps auth type -> SelectiveAuthenticator key
512
+
513
+ defs = manifest.get("definitions", {})
514
+
515
+ # Find the authenticator - could be in base_requester or retriever.requester
516
+ authenticator = None
517
+
518
+ if "base_requester" in defs:
519
+ authenticator = defs["base_requester"].get("authenticator")
520
+
521
+ if not authenticator and "retriever" in defs:
522
+ requester = defs["retriever"].get("requester", {})
523
+ authenticator = requester.get("authenticator")
524
+
525
+ if authenticator:
526
+ _extract_auth_from_authenticator(authenticator, auth_types, auth_option_keys, defs)
527
+
528
+ return auth_types, auth_option_keys
529
+
530
+
531
+ def _extract_auth_from_authenticator(
532
+ authenticator: dict[str, Any],
533
+ auth_types: set[str],
534
+ auth_option_keys: dict[str, str],
535
+ defs: dict[str, Any],
536
+ option_key: str | None = None,
537
+ ) -> None:
538
+ """Extract auth types from an authenticator definition.
539
+
540
+ Handles:
541
+ - Single authenticators (e.g., BearerAuthenticator, ApiKeyAuthenticator)
542
+ - SelectiveAuthenticator with multiple options
543
+ - $ref references to other definitions
544
+
545
+ Args:
546
+ authenticator: The authenticator definition
547
+ auth_types: Set to add found auth types to
548
+ auth_option_keys: Dict to add auth type -> SelectiveAuthenticator option key mappings
549
+ defs: The definitions dict for resolving refs
550
+ option_key: The SelectiveAuthenticator option key (e.g., "Private App Credentials")
551
+ """
552
+ # Handle $ref references that weren't fully resolved
553
+ if "$ref" in authenticator and len(authenticator) == 1:
554
+ ref_path = authenticator["$ref"]
555
+ if ref_path.startswith("#/definitions/"):
556
+ ref_name = ref_path.split("/")[-1]
557
+ authenticator = defs.get(ref_name, {})
558
+
559
+ auth_type = authenticator.get("type", "")
560
+
561
+ if auth_type == "SelectiveAuthenticator":
562
+ # Multiple auth options - extract from each, passing the option key
563
+ authenticators = authenticator.get("authenticators", {})
564
+ for key, auth_def in authenticators.items():
565
+ if isinstance(auth_def, dict):
566
+ _extract_auth_from_authenticator(auth_def, auth_types, auth_option_keys, defs, key)
567
+ elif isinstance(auth_def, str) and auth_def.startswith("#/definitions/"):
568
+ # String reference
569
+ ref_name = auth_def.split("/")[-1]
570
+ ref_def = defs.get(ref_name, {})
571
+ if ref_def:
572
+ _extract_auth_from_authenticator(ref_def, auth_types, auth_option_keys, defs, key)
573
+ elif auth_type:
574
+ normalized = _normalize_auth_type(auth_type)
575
+ auth_types.add(normalized)
576
+ # Store the option key if provided and not already set
577
+ if option_key and normalized not in auth_option_keys:
578
+ auth_option_keys[normalized] = option_key
579
+
580
+
581
+ def _extract_auth_types_from_registry(registry_metadata: dict[str, Any]) -> set[str]:
582
+ """Extract auth types from Airbyte registry metadata.
583
+
584
+ Only extracts OAuth from the registry since it's the only reliable indicator.
585
+ The registry's credential property names are ambiguous (e.g., "access_token"
586
+ could be OAuth, bearer token, or API key depending on context).
587
+
588
+ For non-OAuth auth types, use _extract_auth_types_from_manifest() which has
589
+ explicit authenticator type declarations.
590
+ """
591
+ auth_types: set[str] = set()
592
+
593
+ spec = registry_metadata.get("spec", {})
594
+
595
+ # Check advanced_auth for OAuth - this is the only reliable indicator from registry
596
+ advanced_auth = spec.get("advanced_auth", {})
597
+ if advanced_auth.get("auth_flow_type") == "oauth2.0":
598
+ auth_types.add("oauth2")
599
+
600
+ return auth_types
601
+
602
+
603
+ def _extract_auth_types_from_connector(raw_spec: dict[str, Any]) -> set[str]:
604
+ """Extract auth types from our connector.yaml.
605
+
606
+ Looks at components.securitySchemes.
607
+ """
608
+ auth_types: set[str] = set()
609
+
610
+ security_schemes = raw_spec.get("components", {}).get("securitySchemes", {})
611
+
612
+ for _name, scheme in security_schemes.items():
613
+ scheme_type = scheme.get("type", "")
614
+
615
+ if scheme_type == "oauth2":
616
+ auth_types.add("oauth2")
617
+ elif scheme_type == "http":
618
+ http_scheme = scheme.get("scheme", "").lower()
619
+ if http_scheme == "bearer":
620
+ auth_types.add("bearer")
621
+ elif http_scheme == "basic":
622
+ auth_types.add("basic")
623
+ elif scheme_type == "apiKey":
624
+ auth_types.add("api_key")
625
+
626
+ return auth_types
627
+
628
+
629
+ def validate_auth_methods(
630
+ raw_spec: dict[str, Any],
631
+ connector_name: str,
632
+ registry_metadata: dict[str, Any] | None,
633
+ ) -> tuple[bool, list[str], list[str]]:
634
+ """Validate that connector supports required auth methods.
635
+
636
+ Strategy:
637
+ 1. If manifest exists, use it to get auth types (source of truth)
638
+ 2. If NO manifest, fall back to registry advanced_auth to detect OAuth only
639
+
640
+ Args:
641
+ raw_spec: Our connector.yaml as dict
642
+ connector_name: Connector name for fetching manifest
643
+ registry_metadata: Pre-fetched registry metadata
644
+
645
+ Returns:
646
+ (is_valid, errors, warnings)
647
+ """
648
+ errors: list[str] = []
649
+ warnings: list[str] = []
650
+
651
+ # Get our auth types
652
+ our_auth_types = _extract_auth_types_from_connector(raw_spec)
653
+
654
+ # Get skip list from connector spec (these are normalized auth types like "bearer", "oauth2")
655
+ skip_auth_types = set(_extract_skip_auth_methods_from_spec(raw_spec))
656
+
657
+ # Try to get auth types from manifest (source of truth)
658
+ manifest = fetch_airbyte_manifest(connector_name)
659
+ airbyte_auth_types: set[str] = set()
660
+ auth_option_keys: dict[str, str] = {} # Maps auth type to SelectiveAuthenticator option key
661
+
662
+ if manifest:
663
+ airbyte_auth_types, auth_option_keys = _extract_auth_types_from_manifest(manifest)
664
+ elif registry_metadata:
665
+ # No manifest - fall back to registry for OAuth detection only
666
+ airbyte_auth_types = _extract_auth_types_from_registry(registry_metadata)
667
+
668
+ # If we couldn't determine any Airbyte auth types, skip validation
669
+ if not airbyte_auth_types:
670
+ warnings.append(
671
+ f"Could not determine Airbyte auth types for '{connector_name}' "
672
+ f"(no manifest found and no OAuth in registry). Skipping auth validation."
673
+ )
674
+ return True, errors, warnings
675
+
676
+ # Apply skip list and report
677
+ skipped = airbyte_auth_types & skip_auth_types
678
+ if skipped:
679
+ skipped_formatted = []
680
+ for auth_type in sorted(skipped):
681
+ option_key = auth_option_keys.get(auth_type)
682
+ if option_key:
683
+ skipped_formatted.append(f'{auth_type} ("{option_key}")')
684
+ else:
685
+ skipped_formatted.append(auth_type)
686
+ warnings.append(f"Skipped auth methods (via x-airbyte-skip-auth-methods): {', '.join(skipped_formatted)}")
687
+ airbyte_auth_types = airbyte_auth_types - skip_auth_types
688
+
689
+ # Compare auth types
690
+ missing = airbyte_auth_types - our_auth_types
691
+ extra = our_auth_types - airbyte_auth_types
692
+
693
+ if missing:
694
+ # Format missing auth types with option keys if available
695
+ missing_formatted = []
696
+ for auth_type in sorted(missing):
697
+ option_key = auth_option_keys.get(auth_type)
698
+ if option_key:
699
+ missing_formatted.append(f'{auth_type} ("{option_key}")')
700
+ else:
701
+ missing_formatted.append(auth_type)
702
+
703
+ errors.append(
704
+ f"Missing auth methods: {', '.join(missing_formatted)}. "
705
+ f"Our connector supports: {', '.join(sorted(our_auth_types)) if our_auth_types else '(none)'}. "
706
+ f"Add the missing auth scheme to components.securitySchemes, or if this auth method "
707
+ f"cannot be supported, add to info.x-airbyte-skip-auth-methods: [{', '.join(sorted(missing))}]"
708
+ )
709
+
710
+ if extra:
711
+ warnings.append(
712
+ f"Extra auth methods in our connector: {', '.join(sorted(extra))}. " f"These are not in Airbyte's connector but may still be valid."
713
+ )
714
+
715
+ return len(errors) == 0, errors, warnings
716
+
717
+
718
+ def validate_replication_compatibility(
719
+ connector_yaml_path: str | Path,
720
+ raw_spec: dict[str, Any] | None = None,
721
+ ) -> dict[str, Any]:
722
+ """Validate all replication compatibility aspects.
723
+
724
+ Called from validate_connector_readiness() after basic validation passes.
725
+
726
+ Args:
727
+ connector_yaml_path: Path to connector.yaml
728
+ raw_spec: Pre-loaded raw spec dict (optional, will load from file if not provided)
729
+
730
+ Returns:
731
+ {
732
+ "registry_found": bool,
733
+ "connector_id_matches": bool,
734
+ "checks": [
735
+ {"name": "connector_id", "status": "pass|warn|fail", "messages": [...]},
736
+ {"name": "auth_key_mapping", ...},
737
+ ...
738
+ ],
739
+ "errors": list[str],
740
+ "warnings": list[str]
741
+ }
742
+ """
743
+ connector_path = Path(connector_yaml_path)
744
+
745
+ # Load raw spec if not provided
746
+ if raw_spec is None:
747
+ try:
748
+ with open(connector_path) as f:
749
+ raw_spec = yaml.safe_load(f)
750
+ except Exception as e:
751
+ return {
752
+ "registry_found": False,
753
+ "connector_id_matches": False,
754
+ "checks": [],
755
+ "errors": [f"Failed to load connector.yaml: {str(e)}"],
756
+ "warnings": [],
757
+ }
758
+
759
+ # Extract connector info
760
+ info = raw_spec.get("info", {})
761
+ connector_id = info.get("x-airbyte-connector-id", "")
762
+ connector_name = info.get("x-airbyte-connector-name", "")
763
+
764
+ if not connector_id or not connector_name:
765
+ return {
766
+ "registry_found": False,
767
+ "connector_id_matches": False,
768
+ "checks": [],
769
+ "errors": ["Missing x-airbyte-connector-id or x-airbyte-connector-name in connector.yaml"],
770
+ "warnings": [],
771
+ }
772
+
773
+ # Fetch registry metadata
774
+ registry_metadata = fetch_airbyte_registry_metadata(connector_name)
775
+
776
+ all_errors: list[str] = []
777
+ all_warnings: list[str] = []
778
+ checks: list[dict[str, Any]] = []
779
+
780
+ # Check 1: Connector ID validation
781
+ id_valid, id_errors, id_warnings, skip_remaining = validate_connector_id(connector_id, connector_name, registry_metadata)
782
+ all_errors.extend(id_errors)
783
+ all_warnings.extend(id_warnings)
784
+
785
+ # Determine status: pass if valid, warn if skipping (not found in registry), fail if ID mismatch
786
+ if not id_valid:
787
+ id_status = "fail"
788
+ elif skip_remaining:
789
+ id_status = "skip" # Not in registry, but not an error
790
+ else:
791
+ id_status = "pass"
792
+
793
+ checks.append(
794
+ {
795
+ "name": "connector_id",
796
+ "status": id_status,
797
+ "messages": id_errors + id_warnings,
798
+ }
799
+ )
800
+
801
+ # If connector ID doesn't match or registry not found, skip remaining checks
802
+ if skip_remaining:
803
+ return {
804
+ "registry_found": registry_metadata is not None,
805
+ "connector_id_matches": id_valid and not skip_remaining,
806
+ "checks": checks,
807
+ "errors": all_errors,
808
+ "warnings": all_warnings,
809
+ }
810
+
811
+ # Get the connection spec from registry
812
+ connection_spec = registry_metadata.get("spec", {}).get("connectionSpecification", {})
813
+
814
+ # Check 2: Auth key mappings
815
+ auth_mappings_by_scheme = _extract_auth_mappings_from_spec(raw_spec)
816
+ auth_valid = True
817
+ auth_messages: list[str] = []
818
+
819
+ for scheme_name, auth_mappings in auth_mappings_by_scheme.items():
820
+ valid, errors, warnings = validate_auth_key_mapping(auth_mappings, connection_spec, scheme_name)
821
+ if not valid:
822
+ auth_valid = False
823
+ auth_messages.extend(errors)
824
+ auth_messages.extend(warnings)
825
+ all_errors.extend(errors)
826
+ all_warnings.extend(warnings)
827
+
828
+ checks.append(
829
+ {
830
+ "name": "auth_key_mapping",
831
+ "status": "pass" if auth_valid else "fail",
832
+ "messages": auth_messages,
833
+ }
834
+ )
835
+
836
+ # Check 3: Config key mappings
837
+ config_mappings = _extract_config_mappings_from_spec(raw_spec)
838
+ if config_mappings:
839
+ config_valid, config_errors, config_warnings = validate_config_key_mapping(config_mappings, connection_spec)
840
+ all_errors.extend(config_errors)
841
+ all_warnings.extend(config_warnings)
842
+ checks.append(
843
+ {
844
+ "name": "config_key_mapping",
845
+ "status": "pass" if config_valid else "fail",
846
+ "messages": config_errors + config_warnings,
847
+ }
848
+ )
849
+ else:
850
+ checks.append(
851
+ {
852
+ "name": "config_key_mapping",
853
+ "status": "pass",
854
+ "messages": ["No replication_config_key_mapping defined (skipped)"],
855
+ }
856
+ )
857
+
858
+ # Check 4: Environment mappings
859
+ env_mappings = _extract_environment_mappings_from_spec(raw_spec)
860
+ if env_mappings:
861
+ env_valid, env_errors, env_warnings = validate_environment_mapping(env_mappings, connection_spec)
862
+ all_errors.extend(env_errors)
863
+ all_warnings.extend(env_warnings)
864
+ checks.append(
865
+ {
866
+ "name": "environment_mapping",
867
+ "status": "pass" if env_valid else "fail",
868
+ "messages": env_errors + env_warnings,
869
+ }
870
+ )
871
+ else:
872
+ checks.append(
873
+ {
874
+ "name": "environment_mapping",
875
+ "status": "pass",
876
+ "messages": ["No x-airbyte-replication-environment-mapping defined (skipped)"],
877
+ }
878
+ )
879
+
880
+ # Check 5: Suggested streams coverage (based on entities, not cache)
881
+ connector_entities = _extract_connector_entities_from_spec(raw_spec)
882
+ suggested_streams = registry_metadata.get("suggestedStreams", {}).get("streams", [])
883
+ skip_streams = _extract_skip_suggested_streams_from_spec(raw_spec)
884
+
885
+ if connector_entities:
886
+ streams_valid, streams_errors, streams_warnings = validate_suggested_streams_coverage(connector_entities, suggested_streams, skip_streams)
887
+ all_errors.extend(streams_errors)
888
+ all_warnings.extend(streams_warnings)
889
+ checks.append(
890
+ {
891
+ "name": "suggested_streams_coverage",
892
+ "status": "pass" if streams_valid else "fail",
893
+ "messages": streams_errors + streams_warnings,
894
+ }
895
+ )
896
+ elif suggested_streams:
897
+ # No entities defined but there ARE suggested streams
898
+ # Check if all suggested streams are in skip list
899
+ non_skipped_streams = [s for s in suggested_streams if s not in skip_streams]
900
+ skipped_streams = [s for s in suggested_streams if s in skip_streams]
901
+
902
+ if non_skipped_streams:
903
+ # Some suggested streams are not skipped - this is an error
904
+ error_msg = (
905
+ f"No entities defined, but Airbyte has {len(non_skipped_streams)} suggested streams: "
906
+ f"{', '.join(non_skipped_streams)}. Add entities with matching x-airbyte-entity names, "
907
+ f"or add to x-airbyte-skip-suggested-streams to skip."
908
+ )
909
+ all_errors.append(error_msg)
910
+ messages = [error_msg]
911
+ if skipped_streams:
912
+ skip_msg = f"Skipped suggested streams (via x-airbyte-skip-suggested-streams): {', '.join(skipped_streams)}"
913
+ all_warnings.append(skip_msg)
914
+ messages.append(skip_msg)
915
+ checks.append(
916
+ {
917
+ "name": "suggested_streams_coverage",
918
+ "status": "fail",
919
+ "messages": messages,
920
+ }
921
+ )
922
+ else:
923
+ # All suggested streams are skipped - this is fine (with warning)
924
+ skip_msg = f"All {len(skipped_streams)} suggested streams skipped via x-airbyte-skip-suggested-streams: {', '.join(skipped_streams)}"
925
+ all_warnings.append(skip_msg)
926
+ checks.append(
927
+ {
928
+ "name": "suggested_streams_coverage",
929
+ "status": "pass",
930
+ "messages": [skip_msg],
931
+ }
932
+ )
933
+ else:
934
+ # No entities defined and no suggested streams - this is fine
935
+ checks.append(
936
+ {
937
+ "name": "suggested_streams_coverage",
938
+ "status": "pass",
939
+ "messages": ["No entities defined, and no suggested streams in registry (skipped)"],
940
+ }
941
+ )
942
+
943
+ # Check 6: Auth methods compatibility
944
+ auth_valid, auth_errors, auth_warnings = validate_auth_methods(raw_spec, connector_name, registry_metadata)
945
+ all_errors.extend(auth_errors)
946
+ all_warnings.extend(auth_warnings)
947
+
948
+ # Determine status: pass, warn (extra methods), or fail (missing methods)
949
+ if not auth_valid:
950
+ auth_status = "fail"
951
+ elif auth_warnings:
952
+ auth_status = "warn"
953
+ else:
954
+ auth_status = "pass"
955
+
956
+ checks.append(
957
+ {
958
+ "name": "auth_methods",
959
+ "status": auth_status,
960
+ "messages": auth_errors + auth_warnings,
961
+ }
962
+ )
963
+
964
+ return {
965
+ "registry_found": True,
966
+ "connector_id_matches": True,
967
+ "checks": checks,
968
+ "errors": all_errors,
969
+ "warnings": all_warnings,
970
+ }