airbyte-cdk 6.45.9__py3-none-any.whl → 6.46.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. airbyte_cdk/cli/__init__.py +9 -1
  2. airbyte_cdk/cli/airbyte_cdk/__init__.py +86 -0
  3. airbyte_cdk/cli/airbyte_cdk/_connector.py +179 -0
  4. airbyte_cdk/cli/airbyte_cdk/_image.py +95 -0
  5. airbyte_cdk/cli/airbyte_cdk/_manifest.py +24 -0
  6. airbyte_cdk/cli/airbyte_cdk/_secrets.py +150 -0
  7. airbyte_cdk/cli/airbyte_cdk/_util.py +43 -0
  8. airbyte_cdk/cli/airbyte_cdk/_version.py +13 -0
  9. airbyte_cdk/connector_builder/connector_builder_handler.py +10 -0
  10. airbyte_cdk/models/connector_metadata.py +97 -0
  11. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +108 -79
  12. airbyte_cdk/sources/declarative/manifest_declarative_source.py +122 -45
  13. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +87 -82
  14. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +9 -0
  15. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +2 -2
  16. airbyte_cdk/sources/declarative/parsers/manifest_normalizer.py +462 -0
  17. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +2 -2
  18. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +24 -24
  19. airbyte_cdk/sources/file_based/file_record_data.py +1 -1
  20. airbyte_cdk/test/standard_tests/connector_base.py +51 -25
  21. airbyte_cdk/test/standard_tests/declarative_sources.py +3 -1
  22. airbyte_cdk/test/standard_tests/test_resources.py +69 -0
  23. airbyte_cdk/test/standard_tests/util.py +79 -0
  24. airbyte_cdk/utils/docker.py +337 -0
  25. airbyte_cdk/utils/docker_image_templates.py +101 -0
  26. {airbyte_cdk-6.45.9.dist-info → airbyte_cdk-6.46.0.dist-info}/METADATA +6 -1
  27. {airbyte_cdk-6.45.9.dist-info → airbyte_cdk-6.46.0.dist-info}/RECORD +31 -18
  28. {airbyte_cdk-6.45.9.dist-info → airbyte_cdk-6.46.0.dist-info}/entry_points.txt +1 -0
  29. {airbyte_cdk-6.45.9.dist-info → airbyte_cdk-6.46.0.dist-info}/LICENSE.txt +0 -0
  30. {airbyte_cdk-6.45.9.dist-info → airbyte_cdk-6.46.0.dist-info}/LICENSE_SHORT +0 -0
  31. {airbyte_cdk-6.45.9.dist-info → airbyte_cdk-6.46.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,462 @@
1
+ #
2
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import copy
6
+ import hashlib
7
+ import json
8
+ from collections import defaultdict
9
+ from itertools import chain
10
+ from typing import Any, Callable, DefaultDict, Dict, Iterable, List, Optional, Tuple
11
+
12
+ from airbyte_cdk.sources.declarative.parsers.custom_exceptions import ManifestNormalizationException
13
+
14
+ # Type definitions for better readability
15
+ ManifestType = Dict[str, Any]
16
+ DefinitionsType = Dict[str, Any]
17
+ DuplicateOccurancesType = List[Tuple[List[str], Dict[str, Any], Dict[str, Any]]]
18
+ DuplicatesType = DefaultDict[str, DuplicateOccurancesType]
19
+
20
+ # Configuration constants
21
+ N_OCCURANCES = 2
22
+
23
+ DEF_TAG = "definitions"
24
+ LINKABLE_TAG = "linkable"
25
+ LINKED_TAG = "linked"
26
+ PROPERTIES_TAG = "properties"
27
+ SCHEMA_LOADER_TAG = "schema_loader"
28
+ SCHEMA_TAG = "schema"
29
+ SCHEMAS_TAG = "schemas"
30
+ STREAMS_TAG = "streams"
31
+
32
+
33
+ def _get_linkable_schema_tags(schema: DefinitionsType) -> List[str]:
34
+ """
35
+ Extracts linkable tags from schema definitions.
36
+ This function identifies properties within a schema's definitions that are marked as linkable.
37
+ It traverses through each definition in the schema, examines its properties, and collects
38
+ the keys of properties that contain the LINKABLE_TAG.
39
+
40
+ Args:
41
+ schema (DefinitionsType): The schema definition dictionary to process
42
+
43
+ Returns:
44
+ List[str]: A deduplicated list of property keys that are marked as linkable
45
+ """
46
+
47
+ # the linkable scope: ['definitions.*']
48
+ schema_definitions = schema.get(DEF_TAG, {})
49
+
50
+ linkable_tags: List[str] = []
51
+ # Extract linkable keys from properties
52
+
53
+ extract_linkable_keys: Callable[[Dict[str, Dict[str, Any]]], List[str]] = lambda properties: [
54
+ key for key, value in properties.items() if LINKABLE_TAG in value.keys()
55
+ ]
56
+
57
+ # Process each root value to get its linkable keys
58
+ process_root: Callable[[Dict[str, Any]], List[str]] = lambda root_value: extract_linkable_keys(
59
+ root_value.get(PROPERTIES_TAG, {})
60
+ )
61
+
62
+ # Map the process_root function over all schema values and flatten the results
63
+ all_linkable_tags = chain.from_iterable(map(process_root, schema_definitions.values()))
64
+
65
+ # Add all found linkable tags to the tags list
66
+ linkable_tags.extend(all_linkable_tags)
67
+
68
+ # return unique tags only
69
+ return list(set(linkable_tags))
70
+
71
+
72
+ class ManifestNormalizer:
73
+ """
74
+ This class is responsible for normalizing the manifest by appliying processing such as:
75
+ - removing duplicated definitions
76
+ - replacing them with references.
77
+
78
+ To extend the functionality, use the `normilize()` method to include any additional processing steps.
79
+ """
80
+
81
+ def __init__(
82
+ self,
83
+ resolved_manifest: ManifestType,
84
+ declarative_schema: DefinitionsType,
85
+ ) -> None:
86
+ self._resolved_manifest = resolved_manifest
87
+ self._declarative_schema = declarative_schema
88
+ self._normalized_manifest: ManifestType = copy.deepcopy(self._resolved_manifest)
89
+ # get the tags marked as `linkable` in the component schema
90
+ self._linkable_tags = _get_linkable_schema_tags(self._declarative_schema)
91
+
92
+ def to_json_str(self) -> str:
93
+ return json.dumps(self._normalized_manifest, indent=2)
94
+
95
+ def normalize(self) -> ManifestType:
96
+ """
97
+ Normalizes the manifest by deduplicating and resolving schema references.
98
+
99
+ This method processes the manifest in two steps:
100
+ 1. Deduplicates elements within the manifest
101
+ 2. Resolves and references schemas
102
+
103
+ Returns:
104
+ ManifestType: The normalized manifest if processing succeeds,
105
+ or the original resolved manifest if normalization fails.
106
+
107
+ Raises:
108
+ ManifestNormalizationException: Caught internally and handled by returning the original manifest.
109
+ """
110
+ try:
111
+ self._deduplicate_minifest()
112
+ self._reference_schemas()
113
+
114
+ return self._normalized_manifest
115
+ except ManifestNormalizationException:
116
+ # if any error occurs, we just return the original manifest.
117
+ # TODO: enable debug logging
118
+ return self._resolved_manifest
119
+
120
+ def _get_manifest_streams(self) -> Iterable[Dict[str, Any]]:
121
+ """
122
+ Get the streams from the manifest.
123
+
124
+ Returns:
125
+ An Iterable of streams.
126
+ """
127
+
128
+ if STREAMS_TAG in self._normalized_manifest.keys():
129
+ for stream in self._normalized_manifest[STREAMS_TAG]:
130
+ yield stream
131
+
132
+ yield from []
133
+
134
+ def _deduplicate_minifest(self) -> None:
135
+ """
136
+ Find commonalities in the input JSON structure and refactor it to avoid redundancy.
137
+ """
138
+
139
+ try:
140
+ # prepare the `definitions` tag
141
+ self._prepare_definitions()
142
+ # replace duplicates with references, if any
143
+ self._handle_duplicates(self._collect_duplicates())
144
+ except Exception as e:
145
+ raise ManifestNormalizationException(str(e))
146
+
147
+ def _prepare_definitions(self) -> None:
148
+ """
149
+ Clean the definitions in the manifest by removing unnecessary properties.
150
+ This function modifies the manifest in place.
151
+ """
152
+
153
+ # Check if the definitions tag exists
154
+ if not DEF_TAG in self._normalized_manifest:
155
+ self._normalized_manifest[DEF_TAG] = {}
156
+
157
+ # Check if the linked tag exists
158
+ if not LINKED_TAG in self._normalized_manifest[DEF_TAG]:
159
+ self._normalized_manifest[DEF_TAG][LINKED_TAG] = {}
160
+
161
+ # remove everything from definitions tag except of `linked`, after processing
162
+ for key in list(self._normalized_manifest[DEF_TAG].keys()):
163
+ if key != LINKED_TAG:
164
+ self._normalized_manifest[DEF_TAG].pop(key, None)
165
+
166
+ def _extract_stream_schema(self, stream: Dict[str, Any]) -> None:
167
+ """
168
+ Extract the schema from the stream and add it to the `schemas` tag.
169
+ """
170
+
171
+ stream_name = stream["name"]
172
+ # copy the value of the SCHEMA_TAG to the SCHEMAS_TAG with the stream name as key
173
+ schema = stream.get(SCHEMA_LOADER_TAG, {}).get(SCHEMA_TAG)
174
+ if not SCHEMAS_TAG in self._normalized_manifest.keys():
175
+ self._normalized_manifest[SCHEMAS_TAG] = {}
176
+ # add stream schema to the SCHEMAS_TAG
177
+ if not stream_name in self._normalized_manifest[SCHEMAS_TAG].keys():
178
+ # add the schema to the SCHEMAS_TAG with the stream name as key
179
+ self._normalized_manifest[SCHEMAS_TAG][stream_name] = schema
180
+
181
+ def _reference_schemas(self) -> None:
182
+ """
183
+ Set the schema reference for the given stream in the manifest.
184
+ This function modifies the manifest in place.
185
+ """
186
+
187
+ # reference the stream schema for the stream to where it's stored
188
+ if SCHEMAS_TAG in self._normalized_manifest.keys():
189
+ for stream in self._get_manifest_streams():
190
+ self._extract_stream_schema(stream)
191
+ self._set_stream_schema_ref(stream)
192
+
193
+ def _set_stream_schema_ref(self, stream: Dict[str, Any]) -> None:
194
+ """
195
+ Set the schema reference for the given stream in the manifest.
196
+ This function modifies the manifest in place.
197
+ """
198
+ stream_name = stream["name"]
199
+ if SCHEMAS_TAG in self._normalized_manifest.keys():
200
+ if stream_name in self._normalized_manifest[SCHEMAS_TAG]:
201
+ stream[SCHEMA_LOADER_TAG][SCHEMA_TAG] = self._create_schema_ref(stream_name)
202
+
203
+ def _replace_duplicates_with_refs(self, duplicates: DuplicatesType) -> None:
204
+ """
205
+ Process duplicate objects and replace them with references.
206
+
207
+ Args:
208
+ duplicates: The duplicates dictionary collected from the given manifest.
209
+ """
210
+
211
+ for _, occurrences in duplicates.items():
212
+ type_key, key, value = self._get_occurance_samples(occurrences)
213
+ is_linked_def = self._is_linked_definition(type_key, key)
214
+
215
+ # Add to definitions if not there already
216
+ if not is_linked_def:
217
+ self._add_to_linked_definitions(type_key, key, value)
218
+
219
+ # Replace occurrences with references
220
+ for _, parent_obj, value in occurrences:
221
+ if is_linked_def:
222
+ if value == self._get_linked_definition_value(type_key, key):
223
+ parent_obj[key] = self._create_linked_definition_ref(type_key, key)
224
+ else:
225
+ parent_obj[key] = self._create_linked_definition_ref(type_key, key)
226
+
227
+ def _handle_duplicates(self, duplicates: DuplicatesType) -> None:
228
+ """
229
+ Process the duplicates and replace them with references.
230
+
231
+ Args:
232
+ duplicates: The duplicates dictionary collected from the given manifest.
233
+ """
234
+
235
+ if len(duplicates) > 0:
236
+ self._replace_duplicates_with_refs(duplicates)
237
+
238
+ def _add_duplicate(
239
+ self,
240
+ duplicates: DuplicatesType,
241
+ current_path: List[str],
242
+ obj: Dict[str, Any],
243
+ value: Any,
244
+ key: Optional[str] = None,
245
+ ) -> None:
246
+ """
247
+ Adds a duplicate record of an observed object by computing a unique hash for the provided value.
248
+
249
+ This function computes a hash for the given value (or a dictionary composed of the key and value if a key is provided)
250
+ and appends a tuple containing the current path, the original object, and the value to the duplicates
251
+ dictionary under the corresponding hash.
252
+
253
+ Parameters:
254
+ duplicates (DuplicatesType): The dictionary to store duplicate records.
255
+ current_path (List[str]): The list of keys or indices representing the current location in the object hierarchy.
256
+ obj (Dict): The original dictionary object where the duplicate is observed.
257
+ value (Any): The value to be hashed and used for identifying duplicates.
258
+ key (Optional[str]): An optional key that, if provided, wraps the value in a dictionary before hashing.
259
+ """
260
+
261
+ # create hash for each duplicate observed
262
+ value_to_hash = {key: value} if key is not None else value
263
+ duplicates[self._hash_object(value_to_hash)].append((current_path, obj, value))
264
+
265
+ def _add_to_linked_definitions(
266
+ self,
267
+ type_key: str,
268
+ key: str,
269
+ value: Any,
270
+ ) -> None:
271
+ """
272
+ Add a value to the linked definitions under the specified key.
273
+
274
+ Args:
275
+ definitions: The definitions dictionary to modify
276
+ key: The key to use
277
+ value: The value to add
278
+ """
279
+ if type_key not in self._normalized_manifest[DEF_TAG][LINKED_TAG].keys():
280
+ self._normalized_manifest[DEF_TAG][LINKED_TAG][type_key] = {}
281
+
282
+ if key not in self._normalized_manifest[DEF_TAG][LINKED_TAG][type_key].keys():
283
+ self._normalized_manifest[DEF_TAG][LINKED_TAG][type_key][key] = value
284
+
285
+ def _collect_duplicates(self) -> DuplicatesType:
286
+ """
287
+ Traverse the JSON object and collect all potential duplicate values and objects.
288
+
289
+ Returns:
290
+ duplicates: A dictionary of duplicate objects.
291
+ """
292
+
293
+ def _collect(obj: Dict[str, Any], path: Optional[List[str]] = None) -> None:
294
+ """
295
+ The closure to recursively collect duplicates in the JSON object.
296
+
297
+ Args:
298
+ obj: The current object being analyzed.
299
+ path: The current path in the object hierarchy.
300
+ """
301
+
302
+ if not isinstance(obj, dict):
303
+ return
304
+
305
+ path = [] if path is None else path
306
+ # Check if the object is empty
307
+ for key, value in obj.items():
308
+ # do not collect duplicates from `definitions` tag
309
+ if key == DEF_TAG:
310
+ continue
311
+
312
+ current_path = path + [key]
313
+
314
+ if isinstance(value, dict):
315
+ # First process nested dictionaries
316
+ _collect(value, current_path)
317
+ # Process allowed-only component tags
318
+ if key in self._linkable_tags:
319
+ self._add_duplicate(duplicates, current_path, obj, value)
320
+
321
+ # handle primitive types
322
+ elif isinstance(value, (str, int, float, bool)):
323
+ # Process allowed-only field tags
324
+ if key in self._linkable_tags:
325
+ self._add_duplicate(duplicates, current_path, obj, value, key)
326
+
327
+ # handle list cases
328
+ elif isinstance(value, list):
329
+ for i, item in enumerate(value):
330
+ _collect(item, current_path + [str(i)])
331
+
332
+ duplicates: DuplicatesType = defaultdict(list, {})
333
+ try:
334
+ if self._linkable_tags:
335
+ _collect(self._normalized_manifest)
336
+ # clean non-duplicates and sort based on the count of occurrences
337
+ return self._clean_and_sort_duplicates(duplicates)
338
+ return duplicates
339
+ except Exception as e:
340
+ raise ManifestNormalizationException(str(e))
341
+
342
+ def _clean_and_sort_duplicates(self, duplicates: DuplicatesType) -> DuplicatesType:
343
+ """
344
+ Clean non-duplicates and sort the duplicates by their occurrences.
345
+
346
+ Args:
347
+ duplicates: The duplicates dictionary to sort
348
+
349
+ Returns:
350
+ A sorted duplicates dictionary.
351
+ """
352
+
353
+ # clean non-duplicates
354
+ duplicates = defaultdict(
355
+ list,
356
+ {k: v for k, v in duplicates.items() if len(v) >= N_OCCURANCES},
357
+ )
358
+
359
+ # sort the duplicates by their occurrences, more frequent ones go first
360
+ duplicates = defaultdict(
361
+ list,
362
+ {k: v for k, v in sorted(duplicates.items(), key=lambda x: len(x[1]), reverse=True)},
363
+ )
364
+
365
+ return duplicates
366
+
367
+ def _hash_object(self, obj: Dict[str, Any]) -> str:
368
+ """
369
+ Create a unique hash for a dictionary object.
370
+
371
+ Args:
372
+ node: The dictionary to hash
373
+
374
+ Returns:
375
+ A hashed string
376
+ """
377
+
378
+ # Sort keys to ensure consistent hash for same content
379
+ return hashlib.md5(json.dumps(obj, sort_keys=True).encode()).hexdigest()
380
+
381
+ def _is_linked_definition(self, type_key: str, key: str) -> bool:
382
+ """
383
+ Check if the key already exists in the linked definitions.
384
+
385
+ Args:
386
+ key: The key to check
387
+ definitions: The definitions dictionary with definitions
388
+
389
+ Returns:
390
+ True if the key exists in the linked definitions, False otherwise
391
+ """
392
+
393
+ if type_key in self._normalized_manifest[DEF_TAG][LINKED_TAG].keys():
394
+ # Check if the key exists in the linked definitions
395
+ if key in self._normalized_manifest[DEF_TAG][LINKED_TAG][type_key].keys():
396
+ return True
397
+
398
+ return False
399
+
400
+ def _get_linked_definition_value(self, type_key: str, key: str) -> Any:
401
+ """
402
+ Get the value of a linked definition by its key.
403
+
404
+ Args:
405
+ key: The key to check
406
+ definitions: The definitions dictionary with definitions
407
+
408
+ Returns:
409
+ The value of the linked definition
410
+ """
411
+ if type_key in self._normalized_manifest[DEF_TAG][LINKED_TAG].keys():
412
+ if key in self._normalized_manifest[DEF_TAG][LINKED_TAG][type_key].keys():
413
+ return self._normalized_manifest[DEF_TAG][LINKED_TAG][type_key][key]
414
+ else:
415
+ raise ManifestNormalizationException(
416
+ f"Key {key} not found in linked definitions. Please check the manifest."
417
+ )
418
+
419
+ def _get_occurance_samples(self, occurrences: DuplicateOccurancesType) -> Tuple[str, str, Any]:
420
+ """
421
+ Get the key from the occurrences list.
422
+
423
+ Args:
424
+ occurrences: The occurrences list
425
+
426
+ Returns:
427
+ The key, type and value from the occurrences
428
+ """
429
+
430
+ # Take the value from the first occurrence, as they are the same
431
+ path, obj, value = occurrences[0]
432
+ return (
433
+ obj["type"],
434
+ path[-1],
435
+ value,
436
+ ) # Return the component's name as the last part of its path
437
+
438
+ def _create_linked_definition_ref(self, type_key: str, key: str) -> Dict[str, str]:
439
+ """
440
+ Create a reference object for the linked definitions using the specified key.
441
+
442
+ Args:
443
+ ref_key: The reference key to use
444
+
445
+ Returns:
446
+ A reference object in the proper format
447
+ """
448
+
449
+ return {"$ref": f"#/{DEF_TAG}/{LINKED_TAG}/{type_key}/{key}"}
450
+
451
+ def _create_schema_ref(self, key: str) -> Dict[str, str]:
452
+ """
453
+ Create a reference object for stream schema using the specified key.
454
+
455
+ Args:
456
+ key: The reference key to use
457
+
458
+ Returns:
459
+ A reference object in the proper format
460
+ """
461
+
462
+ return {"$ref": f"#/{SCHEMAS_TAG}/{key}"}
@@ -3,7 +3,7 @@
3
3
  #
4
4
 
5
5
  import re
6
- from typing import Any, Mapping, Set, Tuple, Union
6
+ from typing import Any, Dict, Mapping, Set, Tuple, Union
7
7
 
8
8
  from airbyte_cdk.sources.declarative.parsers.custom_exceptions import (
9
9
  CircularReferenceException,
@@ -99,7 +99,7 @@ class ManifestReferenceResolver:
99
99
  until we find a key with the given path, or until there is nothing to traverse.
100
100
  """
101
101
 
102
- def preprocess_manifest(self, manifest: Mapping[str, Any]) -> Mapping[str, Any]:
102
+ def preprocess_manifest(self, manifest: Mapping[str, Any]) -> Dict[str, Any]:
103
103
  """
104
104
  :param manifest: incoming manifest that could have references to previously defined components
105
105
  :return:
@@ -2194,11 +2194,20 @@ class ModelToComponentFactory:
2194
2194
 
2195
2195
  api_budget = self._api_budget
2196
2196
 
2197
+ # Removes QueryProperties components from the interpolated mappings because it has been designed
2198
+ # to be used by the SimpleRetriever and will be resolved from the provider from the slice directly
2199
+ # instead of through jinja interpolation
2200
+ request_parameters: Optional[Union[str, Mapping[str, str]]]
2201
+ if isinstance(model.request_parameters, Mapping):
2202
+ request_parameters = self._remove_query_properties(model.request_parameters)
2203
+ else:
2204
+ request_parameters = model.request_parameters
2205
+
2197
2206
  request_options_provider = InterpolatedRequestOptionsProvider(
2198
2207
  request_body_data=model.request_body_data,
2199
2208
  request_body_json=model.request_body_json,
2200
2209
  request_headers=model.request_headers,
2201
- request_parameters=model.request_parameters,
2210
+ request_parameters=request_parameters,
2202
2211
  query_properties_key=query_properties_key,
2203
2212
  config=config,
2204
2213
  parameters=model.parameters or {},
@@ -2818,6 +2827,10 @@ class ModelToComponentFactory:
2818
2827
  else None
2819
2828
  )
2820
2829
 
2830
+ if model.transform_before_filtering is None:
2831
+ # default to False if not set
2832
+ model.transform_before_filtering = False
2833
+
2821
2834
  assert model.transform_before_filtering is not None # for mypy
2822
2835
 
2823
2836
  transform_before_filtering = model.transform_before_filtering
@@ -2832,6 +2845,10 @@ class ModelToComponentFactory:
2832
2845
  )
2833
2846
  transform_before_filtering = True
2834
2847
 
2848
+ if model.schema_normalization is None:
2849
+ # default to no schema normalization if not set
2850
+ model.schema_normalization = SchemaNormalizationModel.None_
2851
+
2835
2852
  schema_normalization = (
2836
2853
  TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization])
2837
2854
  if isinstance(model.schema_normalization, SchemaNormalizationModel)
@@ -2938,16 +2955,9 @@ class ModelToComponentFactory:
2938
2955
  # When translating JSON schema into Pydantic models, enforcing types for arrays containing both
2939
2956
  # concrete string complex object definitions like QueryProperties would get resolved to Union[str, Any].
2940
2957
  # This adds the extra validation that we couldn't get for free in Pydantic model generation
2941
- if (
2942
- isinstance(request_parameter, Mapping)
2943
- and request_parameter.get("type") == "QueryProperties"
2944
- ):
2958
+ if isinstance(request_parameter, QueryPropertiesModel):
2945
2959
  query_properties_key = key
2946
2960
  query_properties_definitions.append(request_parameter)
2947
- elif not isinstance(request_parameter, str):
2948
- raise ValueError(
2949
- f"Each element of request_parameters should be of type str or QueryProperties, but received {request_parameter.get('type')}"
2950
- )
2951
2961
 
2952
2962
  if len(query_properties_definitions) > 1:
2953
2963
  raise ValueError(
@@ -2955,17 +2965,8 @@ class ModelToComponentFactory:
2955
2965
  )
2956
2966
 
2957
2967
  if len(query_properties_definitions) == 1:
2958
- query_properties = self.create_component(
2959
- model_type=QueryPropertiesModel,
2960
- component_definition=query_properties_definitions[0],
2961
- config=config,
2962
- )
2963
-
2964
- # Removes QueryProperties components from the interpolated mappings because it will be resolved in
2965
- # the provider from the slice directly instead of through jinja interpolation
2966
- if isinstance(model.requester.request_parameters, Mapping):
2967
- model.requester.request_parameters = self._remove_query_properties(
2968
- model.requester.request_parameters
2968
+ query_properties = self._create_component_from_model(
2969
+ model=query_properties_definitions[0], config=config
2969
2970
  )
2970
2971
 
2971
2972
  requester = self._create_component_from_model(
@@ -3088,13 +3089,12 @@ class ModelToComponentFactory:
3088
3089
 
3089
3090
  @staticmethod
3090
3091
  def _remove_query_properties(
3091
- request_parameters: Mapping[str, Union[Any, str]],
3092
- ) -> Mapping[str, Union[Any, str]]:
3092
+ request_parameters: Mapping[str, Union[str, QueryPropertiesModel]],
3093
+ ) -> Mapping[str, str]:
3093
3094
  return {
3094
3095
  parameter_field: request_parameter
3095
3096
  for parameter_field, request_parameter in request_parameters.items()
3096
- if not isinstance(request_parameter, Mapping)
3097
- or not request_parameter.get("type") == "QueryProperties"
3097
+ if not isinstance(request_parameter, QueryPropertiesModel)
3098
3098
  }
3099
3099
 
3100
3100
  def create_state_delegating_stream(
@@ -14,7 +14,7 @@ class FileRecordData(BaseModel):
14
14
  """
15
15
 
16
16
  folder: str
17
- filename: str
17
+ file_name: str
18
18
  bytes: int
19
19
  source_uri: str
20
20
  id: Optional[str] = None
@@ -4,7 +4,9 @@
4
4
  from __future__ import annotations
5
5
 
6
6
  import abc
7
+ import importlib
7
8
  import inspect
9
+ import os
8
10
  import sys
9
11
  from collections.abc import Callable
10
12
  from pathlib import Path
@@ -22,17 +24,61 @@ from airbyte_cdk.test.standard_tests._job_runner import IConnector, run_test_job
22
24
  from airbyte_cdk.test.standard_tests.models import (
23
25
  ConnectorTestScenario,
24
26
  )
25
-
26
- ACCEPTANCE_TEST_CONFIG = "acceptance-test-config.yml"
27
- MANIFEST_YAML = "manifest.yaml"
27
+ from airbyte_cdk.test.standard_tests.test_resources import (
28
+ ACCEPTANCE_TEST_CONFIG,
29
+ find_connector_root,
30
+ )
28
31
 
29
32
 
30
33
  class ConnectorTestSuiteBase(abc.ABC):
31
34
  """Base class for connector test suites."""
32
35
 
33
- connector: type[IConnector] | Callable[[], IConnector] | None = None
36
+ connector: type[IConnector] | Callable[[], IConnector] | None # type: ignore [reportRedeclaration]
34
37
  """The connector class or a factory function that returns an scenario of IConnector."""
35
38
 
39
+ @classproperty # type: ignore [no-redef]
40
+ def connector(cls) -> type[IConnector] | Callable[[], IConnector] | None:
41
+ """Get the connector class for the test suite.
42
+
43
+ This assumes a python connector and should be overridden by subclasses to provide the
44
+ specific connector class to be tested.
45
+ """
46
+ connector_root = cls.get_connector_root_dir()
47
+ connector_name = connector_root.absolute().name
48
+
49
+ expected_module_name = connector_name.replace("-", "_").lower()
50
+ expected_class_name = connector_name.replace("-", "_").title().replace("_", "")
51
+
52
+ # dynamically import and get the connector class: <expected_module_name>.<expected_class_name>
53
+
54
+ cwd_snapshot = Path().absolute()
55
+ os.chdir(connector_root)
56
+
57
+ # Dynamically import the module
58
+ try:
59
+ module = importlib.import_module(expected_module_name)
60
+ except ModuleNotFoundError as e:
61
+ raise ImportError(f"Could not import module '{expected_module_name}'.") from e
62
+ finally:
63
+ # Change back to the original working directory
64
+ os.chdir(cwd_snapshot)
65
+
66
+ # Dynamically get the class from the module
67
+ try:
68
+ return cast(type[IConnector], getattr(module, expected_class_name))
69
+ except AttributeError as e:
70
+ # We did not find it based on our expectations, so let's check if we can find it
71
+ # with a case-insensitive match.
72
+ matching_class_name = next(
73
+ (name for name in dir(module) if name.lower() == expected_class_name.lower()),
74
+ None,
75
+ )
76
+ if not matching_class_name:
77
+ raise ImportError(
78
+ f"Module '{expected_module_name}' does not have a class named '{expected_class_name}'."
79
+ ) from e
80
+ return cast(type[IConnector], getattr(module, matching_class_name))
81
+
36
82
  @classmethod
37
83
  def get_test_class_dir(cls) -> Path:
38
84
  """Get the file path that contains the class."""
@@ -81,27 +127,7 @@ class ConnectorTestSuiteBase(abc.ABC):
81
127
  @classmethod
82
128
  def get_connector_root_dir(cls) -> Path:
83
129
  """Get the root directory of the connector."""
84
- for parent in cls.get_test_class_dir().parents:
85
- if (parent / MANIFEST_YAML).exists():
86
- return parent
87
- if (parent / ACCEPTANCE_TEST_CONFIG).exists():
88
- return parent
89
- if parent.name == "airbyte_cdk":
90
- break
91
- # If we reach here, we didn't find the manifest file in any parent directory
92
- # Check if the manifest file exists in the current directory
93
- for parent in Path.cwd().parents:
94
- if (parent / MANIFEST_YAML).exists():
95
- return parent
96
- if (parent / ACCEPTANCE_TEST_CONFIG).exists():
97
- return parent
98
- if parent.name == "airbyte_cdk":
99
- break
100
-
101
- raise FileNotFoundError(
102
- "Could not find connector root directory relative to "
103
- f"'{str(cls.get_test_class_dir())}' or '{str(Path.cwd())}'."
104
- )
130
+ return find_connector_root([cls.get_test_class_dir(), Path.cwd()])
105
131
 
106
132
  @classproperty
107
133
  def acceptance_test_config_path(cls) -> Path: